Mercurial > repos > iuc > humann
diff humann.xml @ 0:65c80ca30373 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
author | iuc |
---|---|
date | Wed, 12 May 2021 09:06:30 +0000 |
parents | |
children | 6b7622dda516 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/humann.xml Wed May 12 09:06:30 2021 +0000 @@ -0,0 +1,991 @@ +<tool id="humann" name="HUMAnN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>to profile presence/absence and abundance of microbial pathways and gene families</description> + <macros> + <import>macros.xml</import> + <xml name="prescreen"> + <section name="prescreen" title="Prescreen / Identifying community species" expanded="true"> + <conditional name="metaphlan_db"> + <param name="selector" type="select" label="Database with clade-specific marker genes"> + <option value="cached" selected="true">Locally cached</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> + <options from_data_table="metaphlan_database"> + <validator message="No MetaPhlAn database is available" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> + <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/> + </when> + </conditional> + <param argument="--prescreen-threshold" type="float" value="0.01" min="0" max="100" label="Minimum percentage of reads matching a species"/> + <!-- add metaphlan options --> + </section> + </xml> + <token name="@PRESCREEN_PREPARE@"><![CDATA[ + #if $wf.prescreen.metaphlan_db.selector == "history" + mkdir metaphlan_db + && + bowtie2-build '$wf.prescreen.metaphlan_db.bowtie2db' 'metaphlan_db/custom_db-v30' + && + python '$__tool_directory__/customizemetadata.py' + transform_json_to_pkl + --json '$wf.prescreen.metaphlan_db.mpa_pkl' + --pkl 'metaphlan_db/custom_db-v30.pkl' + && + #end if + ]]></token> + <token name="@PRESCREEN_RUN@"><![CDATA[ + #set $metaphlan_option = "-t rel_ab" + #if $wf.prescreen.metaphlan_db.selector == "history" + #set $metaphlan_option += " --bowtie2db metaphlan_db/" + #set $metaphlan_option += " --index custom_db-v30" + #else + #set $metaphlan_option += " --bowtie2db %s" % $wf.prescreen.metaphlan_db.cached_db.fields.path + #set $metaphlan_option += " --index %s" % $wf.prescreen.metaphlan_db.cached_db.fields.dbkey + #end if + --metaphlan-options="$metaphlan_option" + --prescreen-threshold $wf.prescreen.prescreen_threshold + ]]></token> + <xml name="nucleotide_database"> + <param argument="--nucleotide-database" type="data_collection" collection_type="list" format="fasta" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/> + </xml> + <xml name="nucleotide_search"> + <section name="nucleotide_search" title="Nucleotide search / Mapping reads to community pangenomes" expanded="true"> + <conditional name="nucleotide_db"> + <param name="selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases"> + <option value="cached" selected="true">Locally cached</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="nucleotide_database" type="select" label="Nucleotide database"> + <options from_data_table="humann_nucleotide_database"> + <validator message="No nucleotide database is available" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <expand macro="nucleotide_database"/> + </when> + </conditional> + <!-- add bowtie2 options --> + <param argument="--nucleotide-identity-threshold" type="float" value="0" min="0" max="100" + label="Identity threshold for nucleotide alignments"/> + <param argument="--nucleotide-subject-coverage-threshold" type="float" value="50" min="0" max="100" + label="Subject coverage threshold for nucleotide alignments"/> + <param argument="--nucleotide-query-coverage-threshold" type="float" value="90" min="0" max="100" + label="Query coverage threshold for nucleotide alignments"/> + </section> + </xml> + <token name="@NUCLEOTIDE_SEARCH_PREPARE@"><![CDATA[ + #if $wf.nucleotide_search.nucleotide_db.selector == 'history' + mkdir nucleotide_db + && + #for $f in $wf.nucleotide_search.nucleotide_db.nucleotide_database: + ln -s '$f' 'nucleotide_db/${re.sub('[^\w\-_.]', '_', f.element_identifier)}.v296_201901b' && + #end for + #end if + ]]></token> + <token name="@NUCLEOTIDE_SEARCH_RUN@"><![CDATA[ + #if $wf.nucleotide_search.nucleotide_db.selector == 'history' + --nucleotide-database nucleotide_db + #else + --nucleotide-database '$wf.nucleotide_search.nucleotide_db.nucleotide_database.fields.path' + #end if + --nucleotide-identity-threshold $wf.nucleotide_search.nucleotide_identity_threshold + --nucleotide-subject-coverage-threshold $wf.nucleotide_search.nucleotide_subject_coverage_threshold + --nucleotide-query-coverage-threshold $wf.nucleotide_search.nucleotide_query_coverage_threshold + ]]></token> + <xml name="translated_search"> + <section name="translated_search" title="Translated search / Aligning unmapped reads to a protein database" expanded="true"> + <conditional name="protein_db"> + <param name="selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases"> + <option value="cached" selected="true">Locally cached</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="protein_database" type="select" label="Protein database"> + <options from_data_table="humann_protein_database"> + <validator message="No protein database is available" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param argument="--protein-database" type="data" format="fasta" label="Protein database from history"/> + <param argument="--search-mode" type="select" label="Search mode"> + <option value="uniref50">UniRef50</option> + <option value="uniref90" selected="true">UniRef90</option> + </param> + </when> + </conditional> + <param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/> + <!-- add diamond options --> + <param argument="--translated-identity-threshold" type="float" min="0" max="100" optional="true" + label="Identity threshold for translated alignments" + help="It is tuned automatically (based on uniref mode) unless a custom value is specified"/> + <param argument="--translated-subject-coverage-threshold" type="float" value="50" min="0" max="100" + label="Subject coverage threshold for translated alignments"/> + <param argument="--translated-query-coverage-threshold" type="float" value="90" min="0" max="100" + label="Query coverage threshold for translated alignments"/> + </section> + </xml> + <token name="@TRANSLATED_SEARCH_PREPARE@"><![CDATA[ + #if $wf.translated_search.protein_db.selector == 'history' + mkdir protein_db + && + diamond makedb + --in '$wf.translated_search.protein_db.protein_database' + --db 'protein_db/protein-db-201901b' + --threads "\${GALAXY_SLOTS:-4}" + && + #end if + ]]></token> + <token name="@TRANSLATED_SEARCH_RUN@"><![CDATA[ + --translated-alignment 'diamond' + #if $wf.translated_search.protein_db.selector == 'history' + --protein-database protein_db + --search-mode '$wf.translated_search.protein_db.search_mode' + #else + --protein-database '$wf.translated_search.protein_db.protein_database.fields.path' + #if 'uniref50' in $wf.translated_search.protein_db.protein_database.fields.dbkey + --search-mode 'uniref50' + #else + --search-mode 'uniref90' + #end if + #end if + --evalue $wf.translated_search.evalue + #if str($wf.translated_search.translated_identity_threshold) != '' + --identity-threshold $wf.translated_search.translated_identity_threshold + #end if + --translated-subject-coverage-threshold $wf.translated_search.translated_subject_coverage_threshold + --translated-query-coverage-threshold $wf.translated_search.translated_query_coverage_threshold + ]]></token> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +#import re +#if $in.input.ext.startswith("fasta") + #set ext="fasta" +#else if $in.input.ext.startswith("fastq") + #set ext="fastq" +#else if $in.input.ext.endswith("bam") + #set ext="bam" +#else if $in.input.ext == 'sam' + #set ext="sam" +#else if $in.input.ext == 'biom1' + #set ext="biom" +#else + >&2 "unknown extension $in.input.ext" + exit 1; +#end if +#if $in.input.ext.endswith(".gz") + #set ext+=".gz" +#end if + +#if $wf.selector == 'bypass_prescreen' + @NUCLEOTIDE_SEARCH_PREPARE@ + @TRANSLATED_SEARCH_PREPARE@ +#else if $wf.selector == 'bypass_taxonomic_profiling' + @NUCLEOTIDE_SEARCH_PREPARE@ + @TRANSLATED_SEARCH_PREPARE@ +#else if $wf.selector == 'bypass_nucleotide_index' + @NUCLEOTIDE_SEARCH_PREPARE@ + @TRANSLATED_SEARCH_PREPARE@ +#else if $wf.selector == 'bypass_nucleotide_search' + @TRANSLATED_SEARCH_PREPARE@ +#else if $wf.selector == 'bypass_translated_search' + @PRESCREEN_PREPARE@ + @NUCLEOTIDE_SEARCH_PREPARE@ +#else if $wf.selector == 'none' + @PRESCREEN_PREPARE@ + @NUCLEOTIDE_SEARCH_PREPARE@ + @TRANSLATED_SEARCH_PREPARE@ +#end if + +humann + --input '$input' + --input-format $ext + -o 'output' +#if $wf.selector == 'bypass_prescreen' + --bypass-prescreen + @NUCLEOTIDE_SEARCH_RUN@ + @TRANSLATED_SEARCH_RUN@ +#else if $wf.selector == 'bypass_taxonomic_profiling' + --taxonomic-profile '$wf.taxonomic_profile' + @NUCLEOTIDE_SEARCH_RUN@ + @TRANSLATED_SEARCH_RUN@ +#else if $wf.selector == 'bypass_nucleotide_index' + --bypass-nucleotide-index + @NUCLEOTIDE_SEARCH_RUN@ + @TRANSLATED_SEARCH_RUN@ +#else if $wf.selector == 'bypass_nucleotide_search' + --bypass-nucleotide-search + @TRANSLATED_SEARCH_RUN@ +#else if $wf.selector == 'bypass_translated_search' + --bypass-translated-search + @PRESCREEN_RUN@ + @NUCLEOTIDE_SEARCH_RUN@ +#else if $wf.selector == 'none' + @PRESCREEN_RUN@ + @NUCLEOTIDE_SEARCH_RUN@ + @TRANSLATED_SEARCH_RUN@ +#end if + --gap-fill '$g_p_quant.gap_fill' + --minpath '$g_p_quant.minpath' + --pathways '$g_p_quant.pathways' + --xipe '$g_p_quant.xipe' + --annotation-gene-index $g_p_quant.annotation_gene_index +#if $g_p_quant.id_mapping + --id-mapping '$g_p_quant.id_mapping' +#end if + --log-level 'DEBUG' + --o-log '$log' + --output-basename '$out.output_basename' + --output-format '$out.output_format' + --output-max-decimals $out.output_max_decimals + $out.remove_column_description_output + $out.remove_stratified_output + --threads "\${GALAXY_SLOTS:-4}" + --memory-use minimum + ]]></command> + <inputs> + <conditional name="in"> + <param name="selector" type="select" label="Input(s)"> + <option value="raw" selected="true">Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))</option> + <option value="mapping">Pre-computed mappings of reads to database sequences</option> + <option value="abundance">Pre-computed (typically gene) abundance tables</option> + </param> + <when value="raw"> + <param name="input" type="data" format="fastq,fastq.gz,fasta,fasta.gz" label="Paired-end Fasta/FastQ files should be merged first"/> + </when> + <when value="mapping"> + <param name="input" type="data" format="sam,bam" label="Pre-computed mappings of reads to database sequences"/> + </when> + <when value="abundance"> + <param name="input" type="data" format="tabular,tsv,biom1" label="Pre-computed (typically gene) abundance tables"/> + </when> + </conditional> + <conditional name="wf"> + <param name="selector" type="select" label="Steps"> + <option value="bypass_prescreen">Bypass the prescreen step and run on the full ChocoPhlAn database (--bypass-prescreen)</option> + <option value="bypass_taxonomic_profiling">Bypass the taxonomic profiling step and creates a custom ChocoPhlAn database of the species</option> + <option value="bypass_nucleotide_index">Starts the workflow with the nucleotide alignment step using the provided indexed database (--bypass-nucleotide-index)</option> + <option value="bypass_nucleotide_search">Bypass all of the alignment steps before the translated search (--bypass_nucleotide-search)</option> + <option value="bypass_translated_search">Run all of the alignment steps except the translated search (--bypass_translated-search)</option> + <option value="none" selected="true">Run the full workflow steps</option> + </param> + <when value="bypass_prescreen"> + <expand macro="nucleotide_search"/> + <expand macro="translated_search"/> + </when> + <when value="bypass_taxonomic_profiling"> + <param argument="--taxonomic-profile" type="data" format="tabular,txt" label="Taxonomic profile file"/> + <expand macro="nucleotide_search"/> + <expand macro="translated_search"/> + </when> + <when value="bypass_nucleotide_index"> + <expand macro="nucleotide_search"/> + <expand macro="translated_search"/> + </when> + <when value="bypass_nucleotide_search"> + <expand macro="translated_search"/> + </when> + <when value="bypass_translated_search"> + <expand macro="prescreen"/> + <expand macro="nucleotide_search"/> + </when> + <when value="none"> + <expand macro="prescreen"/> + <expand macro="nucleotide_search"/> + <expand macro="translated_search"/> + </when> + </conditional> + <section name="g_p_quant" title="Gene and pathway quantification" expanded="true"> + <param argument="--gap-fill" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use gap fill computation?"/> + <param argument="--minpath" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use minpath computation?"/> + <param argument="--pathways" type="select" label="Database to use for pathway computations"> + <option value="metacyc" selected="true">MetaCyc</option> + <option value="unipathway">UniPathway</option> + </param> + <param argument="--xipe" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use xipe computation?"/> + <param argument="--annotation-gene-index" type="integer" value="3" label="Index of the gene in the sequence annotation"/> + <param argument="--id-mapping" type="data" format="tsv" optional="true" label="id mapping file for alignments"/> + </section> + <section name="out" title="Outputs" expanded="true"> + <param argument="--output-basename" type="text" value="humann" label="basename"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + <add value="-" /> + </valid> + </sanitizer> + <validator type="empty_field" /> + </param> + <param argument="--output-format" type="select" label="Format of the output files"> + <option value="tsv" selected="true">Tabular</option> + <option value="biom">BIOM</option> + </param> + <param argument="--output-max-decimals" type="integer" value="10" label="Number of decimals to output"/> + <param argument="--remove-column-description-output" type="boolean" truevalue="--remove-column-description-output" falsevalue="" checked="false" label="Remove description in the output column?"/> + <param argument="--remove-stratified-output" type="boolean" truevalue="--remove-stratified-output" falsevalue="" checked="false" label="Remove stratification from output?"/> + <param name="intermediate_temp" type="select" multiple="true" label="Intermediate output files"> + <option value="metaphlan_bowtie2">MetaPhlAn Bowtie2 output</option> + <option value="metaphlan_bugs_list">MetaPhlAn bugs list</option> + <option value="bowtie2_alignment">Bowtie2 alignment results</option> + <option value="bowtie2_reduced_alignment">Bowtie2 reduced alignment results</option> + <option value="bowtie2_unaligned">Unaligned reads after Bowtie2</option> + <option value="custom_chocophlan_database">Custom ChocoPhlAn database</option> + <option value="diamond_aligned">Translated alignment results</option> + <option value="diamond_unaligned">Translated alignment unaligned reads</option> + </param> + </section> + </inputs> + <outputs> + <data format="tabular" name="gene_families_tsv" from_work_dir="output/*_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" > + <filter>out['output_format'] == "tsv"</filter> + </data> + <data format="biom1" name="gene_families_biom" from_work_dir="output/*_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" > + <filter>out['output_format'] == "biom"</filter> + </data> + <data format="tabular" name="pathcoverage_tsv" from_work_dir="output/*_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" > + <filter>out['output_format'] == "tsv"</filter> + </data> + <data format="biom1" name="pathcoverage_biom" from_work_dir="output/*_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" > + <filter>out['output_format'] == "biom"</filter> + </data> + <data format="tabular" name="pathabundance_tsv" from_work_dir="output/*_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" > + <filter>out['output_format'] == "tsv"</filter> + </data> + <data format="biom1" name="pathabundance_biom" from_work_dir="output/*_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" > + <filter>out['output_format'] == "biom"</filter> + </data> + <data format="txt" name="log" label="${tool.name} on ${on_string}: Log"/> + <data format="tabular" name="metaphlan_bowtie2" from_work_dir="output/*_humann_temp/*_metaphlan_bowtie2.txt" label="${tool.name} on ${on_string}: MetaPhlAn Bowtie2 output" > + <filter>"metaphlan_bowtie2" in out['intermediate_temp']</filter> + </data> + <data format="tabular" name="metaphlan_bugs_list" from_work_dir="output/*_humann_temp/*_metaphlan_bugs_list.tsv" label="${tool.name} on ${on_string}: MetaPhlAn bugs list" > + <filter>"metaphlan_bugs_list" in out['intermediate_temp']</filter> + </data> + <data format="sam" name="bowtie2_alignment" from_work_dir="output/*_humann_temp/*_bowtie2_aligned.sam" label="${tool.name} on ${on_string}: Bowtie2 alignment results" > + <filter>"bowtie2_alignment" in out['intermediate_temp']</filter> + </data> + <data format="tabular" name="bowtie2_reduced_alignment" from_work_dir="output/*_humann_temp/*_bowtie2_aligned.tsv" label="${tool.name} on ${on_string}: Bowtie2 reduced alignment results" > + <filter>"bowtie2_reduced_alignment" in out['intermediate_temp']</filter> + </data> + <data format="fasta" name="bowtie2_unaligned" from_work_dir="output/*_humann_temp/*_bowtie2_unaligned.fa" label="${tool.name} on ${on_string}: Unaligned reads after Bowtie2" > + <filter>"bowtie2_unaligned" in out['intermediate_temp']</filter> + </data> + <data format="fasta" name="custom_chocophlan_database" from_work_dir="output/*_humann_temp/*_custom_chocophlan_database.ffn" label="${tool.name} on ${on_string}: Custom ChocoPhlAn database" > + <filter>"custom_chocophlan_database" in out['intermediate_temp']</filter> + </data> + <data format="tabular" name="diamond_aligned" from_work_dir="output/*_humann_temp/*_diamond_aligned.tsv" label="${tool.name} on ${on_string}: Translated alignment results" > + <filter>"diamond_aligned" in out['intermediate_temp']</filter> + </data> + <data format="fasta" name="diamond_unaligned" from_work_dir="output/*_humann_temp/*_diamond_unaligned.fa" label="${tool.name} on ${on_string}: Translated alignment unaligned reads" > + <filter>"diamond_unaligned" in out['intermediate_temp']</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="12"> + <conditional name="in"> + <!-- raw fasta file --> + <param name="selector" value="raw"/> + <param name="input" value="demo.fastq.gz"/> + </conditional> + <conditional name="wf"> + <!-- full workflow --> + <param name="selector" value="none"/> + <section name="prescreen"> + <conditional name="metaphlan_db"> + <param name="selector" value="history"/> + <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/> + <param name="mpa_pkl" value="test-db/metaphlan-db/demo-db-v30.json"/> + </conditional> + <param name="prescreen_threshold" value="0.01"/> + </section> + <section name="nucleotide_search"> + <conditional name="nucleotide_db"> + <param name="selector" value="history"/> + <param name="nucleotide_database"> + <collection type="list"> + <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz" /> + <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz" /> + </collection> + </param> + </conditional> + <param name="nucleotide_identity_threshold" value="0"/> + <param name="nucleotide_subject_coverage_threshold" value="50"/> + <param name="nucleotide_query_coverage_threshold" value="90"/> + </section> + <section name="translated_search"> + <conditional name="protein_db"> + <param name="selector" value="history"/> + <param name="protein_database" value="test-db/protein-db/uniref90_demo_prots_v201901b.fasta"/> + <param name="search_mode" value="uniref90"/> + </conditional> + <param name="evalue" value="1"/> + <param name="translated_subject_coverage_threshold" value="50"/> + <param name="translated_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <!-- intermediate files --> + <param name="output_basename" value="humann"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="tsv"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" + value="metaphlan_bowtie2,metaphlan_bugs_list,bowtie2_alignment,bowtie2_reduced_alignment,bowtie2_unaligned,custom_chocophlan_database,diamond_aligned,diamond_unaligned"/> + </section> + <output name="gene_families_tsv" ftype="tabular" value="demo_genefamilies.tsv" compare="sim_size"> + <assert_contents> + <has_text text="humann_Abundance-RPKs"/> + <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathcoverage_tsv" ftype="tabular" value="demo_pathcoverage.tsv" compare="sim_size"> + <assert_contents> + <has_text text="humann_Coverage"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathabundance_tsv" ftype="tabular" value="demo_pathabundance.tsv" compare="sim_size"> + <assert_contents> + <has_text text="humann_Abundance"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="DATABASE SETTINGS"/> + <has_text text="humann.utilities"/> + <has_text text="humann_genefamilies"/> + <has_text text="humann_pathabundance"/> + <has_text text="humann_pathcoverage"/> + <has_text text="g__Bacteroides.s__Bacteroides_dorei"/> + </assert_contents> + </output> + <output name="metaphlan_bowtie2" ftype="tabular"> + <assert_contents> + <has_text text="s__Bacteroides_dorei_read000116"/> + <has_text text="357276__I9R1V6__DXD47_04125"/> + <has_text text="s__Bacteroides_dorei_read000129"/> + <has_text text="357276__B6W1Y5__IY41_11405"/> + </assert_contents> + </output> + <output name="metaphlan_bugs_list" ftype="tabular"> + <assert_contents> + <has_text text="relative_abundance"/> + <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus"/> + <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei"/> + </assert_contents> + </output> + <output name="bowtie2_alignment" ftype="sam"> + <assert_contents> + <has_text text="SN:821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/> + <has_text text="s__Bacteroides_dorei_read009840"/> + <has_text text="PN:bowtie2"/> + <has_text text="LN:1281"/> + </assert_contents> + </output> + <output name="bowtie2_reduced_alignment" ftype="tabular"> + <assert_contents> + <has_text text="s__Bacteroides_dorei_read000001"/> + <has_text text="821__A6L5K0__BVU_3338|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L5K0|UniRef50_A6L5K0|468"/> + <has_text text="s__Bacteroides_vulgatus_read003845"/> + <has_text text="821__A0A396BBC3__DXC03_14350|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A396BBC3|UniRef50_A0A174FNA3|2934"/> + </assert_contents> + </output> + <output name="bowtie2_unaligned" ftype="fasta"> + <assert_contents> + <has_text text=">s__Bacteroides_dorei_read000001|100"/> + <has_text text=">s__Bacteroides_dorei_read000002|100"/> + <has_text text=">unclassified_read000971|100"/> + <has_text text=">s__Bacteroides_vulgatus_read004473|100"/> + </assert_contents> + </output> + <output name="custom_chocophlan_database" ftype="fasta"> + <assert_contents> + <has_text text=">821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/> + <has_text text=">821__F3PUY1__HMPREF9446_02555|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PUY1|UniRef50_A0A3E5DX68|411"/> + <has_text text=">821__A0A3E4KCH0__DXD33_19495|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A3E4KCH0|UniRef50_F3PP72|3582"/> + </assert_contents> + </output> + <output name="diamond_aligned" ftype="tabular"> + <assert_contents> + <has_text text="UniRef90_Z5XVM9|969"/> + <has_text text="s__Bacteroides_vulgatus_read"/> + <has_text text="s__Bacteroides_vulgatus_read"/> + <has_text text="UniRef90_Y0KEF3|618"/> + </assert_contents> + </output> + <output name="diamond_unaligned" ftype="fasta"> + <assert_contents> + <has_text text=">s__Bacteroides_dorei_read000001|100"/> + <has_text text=">s__Bacteroides_vulgatus_read006412|100"/> + <has_text text=">unclassified_read000867|100"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="in"> + <!-- fastq file --> + <param name="selector" value="raw"/> + <param name="input" value="demo.fasta.gz"/> + </conditional> + <conditional name="wf"> + <!-- bypass_prescreen --> + <param name="selector" value="bypass_prescreen"/> + <section name="nucleotide_search"> + <conditional name="nucleotide_db"> + <param name="selector" value="cached"/> + <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> + </conditional> + <param name="nucleotide_identity_threshold" value="0"/> + <param name="nucleotide_subject_coverage_threshold" value="50"/> + <param name="nucleotide_query_coverage_threshold" value="90"/> + </section> + <section name="translated_search"> + <conditional name="protein_db"> + <param name="selector" value="cached"/> + <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> + </conditional> + <param name="evalue" value="1"/> + <param name="translated_subject_coverage_threshold" value="50"/> + <param name="translated_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <!-- Biom --> + <param name="output_basename" value="humann"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="biom"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" value=""/> + </section> + <output name="gene_families_biom" ftype="biom1"> + <assert_contents> + <has_text text="http://biom-format.org"/> + <has_text text="UniRef90_A0A396BPQ7|g__Bacteroides.s__Bacteroides_vulgatus"/> + <has_text text="UniRef90_W8YTG4|unclassified"/> + </assert_contents> + </output> + <output name="pathcoverage_biom" ftype="biom1"> + <assert_contents> + <has_text text="TREE"/> + <has_text text="format-url"/> + <has_text text="http://biom-format.org"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_text text="humann_Coverage"/> + </assert_contents> + </output> + <output name="pathabundance_biom" ftype="biom1"> + <assert_contents> + <has_text text="TREE"/> + <has_text text="format-url"/> + <has_text text="http://biom-format.org"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_text text="humann_Abundance"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="Running bowtie2-build ........"/> + <has_text text="Total bugs from nucleotide alignment: 2"/> + <has_text text="Total gene families from nucleotide alignment: "/> + <has_text text="Aligning to reference database: "/> + <has_text text="Total gene families after translated alignment: "/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="in"> + <param name="selector" value="raw"/> + <param name="input" value="demo.fasta.gz"/> + </conditional> + <conditional name="wf"> + <!-- bypass_taxonomic_profiling --> + <param name="selector" value="bypass_taxonomic_profiling"/> + <param name="taxonomic_profile" value="demo-taxonomic-profile.tabular"/> + <section name="nucleotide_search"> + <conditional name="nucleotide_db"> + <param name="selector" value="cached"/> + <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> + </conditional> + <param name="nucleotide_identity_threshold" value="0"/> + <param name="nucleotide_subject_coverage_threshold" value="50"/> + <param name="nucleotide_query_coverage_threshold" value="90"/> + </section> + <section name="translated_search"> + <conditional name="protein_db"> + <param name="selector" value="cached"/> + <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> + </conditional> + <param name="evalue" value="1"/> + <param name="translated_subject_coverage_threshold" value="50"/> + <param name="translated_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <param name="output_basename" value="humann"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="tsv"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" value=""/> + </section> + <output name="gene_families_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Abundance-RPKs"/> + <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/> + <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathcoverage_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Coverage"/> + <has_text text="UNINTEGRATED|unclassified"/> + <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathabundance_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Abundance"/> + <has_text text="UNINTEGRATED|unclassified"/> + <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/> + + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="Found g__Bacteroides.s__Bacteroides_vulgatus : "/> + <has_text text="Total species selected from prescreen: 2"/> + <has_text text="Total bugs from nucleotide alignment: 2"/> + <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: "/> + <has_text text="g__Bacteroides.s__Bacteroides_dorei: "/> + <has_text text="Total gene families from nucleotide alignment: "/> + <has_text text="Total bugs after translated alignment: 3"/> + <has_text text="Total gene families after translated alignment"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="in"> + <!-- mapping SAM file --> + <param name="selector" value="mapping"/> + <param name="input" value="demo.sam"/> + </conditional> + <conditional name="wf"> + <!-- bypass_nucleotide_index --> + <param name="selector" value="bypass_nucleotide_index"/> + <section name="nucleotide_search"> + <conditional name="nucleotide_db"> + <param name="selector" value="cached"/> + <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> + </conditional> + <param name="nucleotide_identity_threshold" value="0"/> + <param name="nucleotide_subject_coverage_threshold" value="50"/> + <param name="nucleotide_query_coverage_threshold" value="90"/> + </section> + <section name="translated_search"> + <conditional name="protein_db"> + <param name="selector" value="cached"/> + <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> + </conditional> + <param name="evalue" value="1"/> + <param name="translated_subject_coverage_threshold" value="50"/> + <param name="translated_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <param name="output_basename" value="humann"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="tsv"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" value=""/> + </section> + <output name="gene_families_tsv" ftype="tabular"> + <assert_contents> + <has_text text="UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei"/> + <has_text text="UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathcoverage_tsv" ftype="tabular"> + <assert_contents> + <has_text text="UNMAPPED"/> + <has_text text="UNINTEGRATED"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathabundance_tsv" ftype="tabular"> + <assert_contents> + <has_text text="UNMAPPED"/> + <has_text text="UNINTEGRATED"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="Process the sam mapping results"/> + <has_text text="Computing gene families"/> + <has_text text="Computing pathways abundance and coverage"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="in"> + <!-- raw fasta file --> + <param name="selector" value="raw"/> + <param name="input" value="demo.fastq.gz"/> + </conditional> + <conditional name="wf"> + <!-- bypass_nucleotide_search --> + <param name="selector" value="bypass_nucleotide_search"/> + <section name="translated_search"> + <conditional name="protein_db"> + <param name="selector" value="cached"/> + <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> + </conditional> + <param name="evalue" value="1"/> + <param name="translated_subject_coverage_threshold" value="50"/> + <param name="translated_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <param name="output_basename" value="humann"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="tsv"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" value=""/> + </section> + <output name="gene_families_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Abundance-RPKs"/> + <has_text text="UniRef90_Q9ZUH4|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathcoverage_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Coverage"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathabundance_tsv" ftype="tabular"> + <assert_contents> + <has_text text="humann_Abundance"/> + <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="Total bugs after translated alignment: 1"/> + <has_text text="unclassified: "/> + <has_text text="Unaligned reads after translated alignment: "/> + <has_text text="Total gene families"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="in"> + <!-- raw fasta file --> + <param name="selector" value="raw"/> + <param name="input" value="demo.fastq.gz"/> + </conditional> + <conditional name="wf"> + <!-- bypass_translated_search --> + <param name="selector" value="bypass_translated_search"/> + <section name="prescreen"> + <conditional name="metaphlan_db"> + <param name="selector" value="cached"/> + <param name="cached_db" value="metaphlan-demo-db-20210421"/> + </conditional> + <param name="prescreen_threshold" value="0.01"/> + </section> + <section name="nucleotide_search"> + <conditional name="nucleotide_db"> + <param name="selector" value="cached"/> + <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> + </conditional> + <param name="nucleotide_identity_threshold" value="0"/> + <param name="nucleotide_subject_coverage_threshold" value="50"/> + <param name="nucleotide_query_coverage_threshold" value="90"/> + </section> + </conditional> + <section name="g_p_quant"> + <param name="gap_fill" value="true"/> + <param name="minpath" value="true"/> + <param name="pathways" value="metacyc"/> + <param name="xipe" value="false"/> + <param name="annotation_gene_index" value="3"/> + </section> + <section name="out"> + <param name="output_basename" value="newname"/> + <param name="log_level" value="DEBUG"/> + <param name="output_format" value="tsv"/> + <param name="output_max_decimals" value="10"/> + <param name="remove_column_description_output" value="false"/> + <param name="remove_statified_output" value="false"/> + <param name="intermediate_temp" value=""/> + </section> + <output name="gene_families_tsv" ftype="tabular"> + <assert_contents> + <has_text text="newname_Abundance-RPKs"/> + <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathcoverage_tsv" ftype="tabular"> + <assert_contents> + <has_text text="newname_Coverage"/> + <has_text text="UNMAPPED"/> + <has_text text="UNINTEGRATED"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="pathabundance_tsv" ftype="tabular"> + <assert_contents> + <has_text text="newname_Abundance"/> + <has_text text="UNMAPPED"/> + <has_text text="UNINTEGRATED"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + <output name="log" ftype="txt"> + <assert_contents> + <has_text text="Total bugs from nucleotide alignment: 2"/> + <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: 1195 hits"/> + <has_text text="g__Bacteroides.s__Bacteroides_dorei: 1260 hits"/> + <has_text text="Total gene families from nucleotide alignment: 545"/> + <has_text text="Bypass translated search"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This tool corresponds to the main tool in HUMAnN pipeline: + + +1. Taxomonic prescreen + + Reads are mapped (with MetaPhlAn) to clade-specific marker genes to rapidly identify community species + +2. Pangenome search (nucleotide search) + + Reads are mapped (with Bowtie2) to pangenomes of identified species + +3. Translated search + + Unclassified reads are aligned to a comprehensive and non-redundant protein database + +4. Gene family and pathway quantification + + - Gene abundance estimation + + Mapping results are processed to estimate per-species and community total gene family abundance, weighting by + + - alignment Quality + - gene length + - gene coverage + + - Per-species and community-level metabolic network reconstruction + + Genes are mapped to metabolic reactions to identify a parsiomonious set of pathways that explains each species' observed reactions + + Pathway abundance and coverage are quantified by: + + 1. optimizing over alternative subpathways + 2. imputing abundance for conspicuously depleted reactions + + +Inputs +====== + +HUMAnN can start from a few different types of input data each in a few different types of formats: + +- Quality-controlled shotgun sequencing reads + + This is the most common starting point : A metagenome (DNA reads) or metatranscriptome (RNA reads) + +- Pre-computed mappings of reads to database sequences + +- Pre-computed (typically gene) abundance tables + + +HUMAnN uses 3 reference databases +Locally cached databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. + +Outputs +======= + +HUMAnN creates three output files: + +- Gene families and their abundance +- Pathways and their abundance +- Pathways and their coverage + +Ten intermediate temp output files can also be retrieved. + + ]]></help> + <expand macro="citations"/> +</tool>