humann: humann.xml comparison

comparison humann.xml @ 0:65c80ca30373 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"

author	iuc
date	Wed, 12 May 2021 09:06:30 +0000
parents
children	6b7622dda516

comparison

equal deleted inserted replaced

--1:000000000000
+:65c80ca30373
+<tool id="humann" name="HUMAnN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+<description>to profile presence/absence and abundance of microbial pathways and gene families</description>
+<macros>
+<import>macros.xml</import>
+<xml name="prescreen">
+<section name="prescreen" title="Prescreen / Identifying community species" expanded="true">
+<conditional name="metaphlan_db">
+<param name="selector" type="select" label="Database with clade-specific marker genes">
+<option value="cached" selected="true">Locally cached</option>
+<option value="history">From history</option>
+</param>
+<when value="cached">
+<param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
+<options from_data_table="metaphlan_database">
+<validator message="No MetaPhlAn database is available" type="no_options" />
+</options>
+</param>
+</when>
+<when value="history">
+<param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
+<param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
+</when>
+</conditional>
+<param argument="--prescreen-threshold" type="float" value="0.01" min="0" max="100" label="Minimum percentage of reads matching a species"/>
+<!-- add metaphlan options -->
+</section>
+</xml>
+<token name="@PRESCREEN_PREPARE@"><![CDATA[
+#if $wf.prescreen.metaphlan_db.selector == "history"
+mkdir metaphlan_db
+&&
+bowtie2-build '$wf.prescreen.metaphlan_db.bowtie2db' 'metaphlan_db/custom_db-v30'
+&&
+python '$__tool_directory__/customizemetadata.py'
+transform_json_to_pkl
+--json '$wf.prescreen.metaphlan_db.mpa_pkl'
+--pkl 'metaphlan_db/custom_db-v30.pkl'
+&&
+#end if
+]]></token>
+<token name="@PRESCREEN_RUN@"><![CDATA[
+#set $metaphlan_option = "-t rel_ab"
+#if $wf.prescreen.metaphlan_db.selector == "history"
+#set $metaphlan_option += " --bowtie2db metaphlan_db/"
+#set $metaphlan_option += " --index custom_db-v30"
+#else
+#set $metaphlan_option += " --bowtie2db %s" % $wf.prescreen.metaphlan_db.cached_db.fields.path
+#set $metaphlan_option += " --index %s" % $wf.prescreen.metaphlan_db.cached_db.fields.dbkey
+#end if
+--metaphlan-options="$metaphlan_option"
+--prescreen-threshold $wf.prescreen.prescreen_threshold
+]]></token>
+<xml name="nucleotide_database">
+<param argument="--nucleotide-database" type="data_collection" collection_type="list" format="fasta" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/>
+</xml>
+<xml name="nucleotide_search">
+<section name="nucleotide_search" title="Nucleotide search / Mapping reads to community pangenomes"  expanded="true">
+<conditional name="nucleotide_db">
+<param name="selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
+<option value="cached" selected="true">Locally cached</option>
+<option value="history">From history</option>
+</param>
+<when value="cached">
+<param name="nucleotide_database" type="select" label="Nucleotide database">
+<options from_data_table="humann_nucleotide_database">
+<validator message="No nucleotide database is available" type="no_options" />
+</options>
+</param>
+</when>
+<when value="history">
+<expand macro="nucleotide_database"/>
+</when>
+</conditional>
+<!-- add bowtie2 options -->
+<param argument="--nucleotide-identity-threshold" type="float" value="0" min="0" max="100"
+label="Identity threshold for nucleotide alignments"/>
+<param argument="--nucleotide-subject-coverage-threshold" type="float" value="50" min="0" max="100"
+label="Subject coverage threshold for nucleotide alignments"/>
+<param argument="--nucleotide-query-coverage-threshold" type="float" value="90" min="0" max="100"
+label="Query coverage threshold for nucleotide alignments"/>
+</section>
+</xml>
+<token name="@NUCLEOTIDE_SEARCH_PREPARE@"><![CDATA[
+#if $wf.nucleotide_search.nucleotide_db.selector == 'history'
+mkdir nucleotide_db
+&&
+#for $f in $wf.nucleotide_search.nucleotide_db.nucleotide_database:
+ln -s '$f' 'nucleotide_db/${re.sub('[^\w\-_.]', '_', f.element_identifier)}.v296_201901b' &&
+#end for
+#end if
+]]></token>
+<token name="@NUCLEOTIDE_SEARCH_RUN@"><![CDATA[
+#if $wf.nucleotide_search.nucleotide_db.selector == 'history'
+--nucleotide-database nucleotide_db
+#else
+--nucleotide-database '$wf.nucleotide_search.nucleotide_db.nucleotide_database.fields.path'
+#end if
+--nucleotide-identity-threshold $wf.nucleotide_search.nucleotide_identity_threshold
+--nucleotide-subject-coverage-threshold $wf.nucleotide_search.nucleotide_subject_coverage_threshold
+--nucleotide-query-coverage-threshold $wf.nucleotide_search.nucleotide_query_coverage_threshold
+]]></token>
+<xml name="translated_search">
+<section name="translated_search" title="Translated search / Aligning unmapped reads to a protein database" expanded="true">
+<conditional name="protein_db">
+<param name="selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
+<option value="cached" selected="true">Locally cached</option>
+<option value="history">From history</option>
+</param>
+<when value="cached">
+<param name="protein_database" type="select" label="Protein database">
+<options from_data_table="humann_protein_database">
+<validator message="No protein database is available" type="no_options" />
+</options>
+</param>
+</when>
+<when value="history">
+<param argument="--protein-database" type="data" format="fasta" label="Protein database from history"/>
+<param argument="--search-mode" type="select" label="Search mode">
+<option value="uniref50">UniRef50</option>
+<option value="uniref90" selected="true">UniRef90</option>
+</param>
+</when>
+</conditional>
+<param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/>
+<!-- add diamond options -->
+<param argument="--translated-identity-threshold" type="float" min="0" max="100" optional="true"
+label="Identity threshold for translated alignments"
+help="It is tuned automatically (based on uniref mode) unless a custom value is specified"/>
+<param argument="--translated-subject-coverage-threshold" type="float" value="50" min="0" max="100"
+label="Subject coverage threshold for translated alignments"/>
+<param argument="--translated-query-coverage-threshold" type="float" value="90" min="0" max="100"
+label="Query coverage threshold for translated alignments"/>
+</section>
+</xml>
+<token name="@TRANSLATED_SEARCH_PREPARE@"><![CDATA[
+#if $wf.translated_search.protein_db.selector == 'history'
+mkdir protein_db
+&&
+diamond makedb
+--in '$wf.translated_search.protein_db.protein_database'
+--db 'protein_db/protein-db-201901b'
+--threads "\${GALAXY_SLOTS:-4}"
+&&
+#end if
+]]></token>
+<token name="@TRANSLATED_SEARCH_RUN@"><![CDATA[
+--translated-alignment 'diamond'
+#if $wf.translated_search.protein_db.selector == 'history'
+--protein-database protein_db
+--search-mode '$wf.translated_search.protein_db.search_mode'
+#else
+--protein-database '$wf.translated_search.protein_db.protein_database.fields.path'
+#if 'uniref50' in $wf.translated_search.protein_db.protein_database.fields.dbkey
+--search-mode 'uniref50'
+#else
+--search-mode 'uniref90'
+#end if
+#end if
+--evalue $wf.translated_search.evalue
+#if str($wf.translated_search.translated_identity_threshold) != ''
+--identity-threshold $wf.translated_search.translated_identity_threshold
+#end if
+--translated-subject-coverage-threshold $wf.translated_search.translated_subject_coverage_threshold
+--translated-query-coverage-threshold $wf.translated_search.translated_query_coverage_threshold
+]]></token>
+</macros>
+<expand macro="edam_ontology"/>
+<expand macro="requirements"/>
+<expand macro="version"/>
+<command detect_errors="exit_code"><![CDATA[
+#import re
+#if $in.input.ext.startswith("fasta")
+#set ext="fasta"
+#else if $in.input.ext.startswith("fastq")
+#set ext="fastq"
+#else if $in.input.ext.endswith("bam")
+#set ext="bam"
+#else if $in.input.ext == 'sam'
+#set ext="sam"
+#else if $in.input.ext == 'biom1'
+#set ext="biom"
+#else
+>&2 "unknown extension $in.input.ext"
+exit 1;
+#end if
+#if $in.input.ext.endswith(".gz")
+#set ext+=".gz"
+#end if
+#if $wf.selector == 'bypass_prescreen'
+@NUCLEOTIDE_SEARCH_PREPARE@
+@TRANSLATED_SEARCH_PREPARE@
+#else if $wf.selector == 'bypass_taxonomic_profiling'
+@NUCLEOTIDE_SEARCH_PREPARE@
+@TRANSLATED_SEARCH_PREPARE@
+#else if $wf.selector == 'bypass_nucleotide_index'
+@NUCLEOTIDE_SEARCH_PREPARE@
+@TRANSLATED_SEARCH_PREPARE@
+#else if $wf.selector == 'bypass_nucleotide_search'
+@TRANSLATED_SEARCH_PREPARE@
+#else if $wf.selector == 'bypass_translated_search'
+@PRESCREEN_PREPARE@
+@NUCLEOTIDE_SEARCH_PREPARE@
+#else if $wf.selector == 'none'
+@PRESCREEN_PREPARE@
+@NUCLEOTIDE_SEARCH_PREPARE@
+@TRANSLATED_SEARCH_PREPARE@
+#end if
+humann
+--input '$input'
+--input-format $ext
+-o 'output'
+#if $wf.selector == 'bypass_prescreen'
+--bypass-prescreen
+@NUCLEOTIDE_SEARCH_RUN@
+@TRANSLATED_SEARCH_RUN@
+#else if $wf.selector == 'bypass_taxonomic_profiling'
+--taxonomic-profile '$wf.taxonomic_profile'
+@NUCLEOTIDE_SEARCH_RUN@
+@TRANSLATED_SEARCH_RUN@
+#else if $wf.selector == 'bypass_nucleotide_index'
+--bypass-nucleotide-index
+@NUCLEOTIDE_SEARCH_RUN@
+@TRANSLATED_SEARCH_RUN@
+#else if $wf.selector == 'bypass_nucleotide_search'
+--bypass-nucleotide-search
+@TRANSLATED_SEARCH_RUN@
+#else if $wf.selector == 'bypass_translated_search'
+--bypass-translated-search
+@PRESCREEN_RUN@
+@NUCLEOTIDE_SEARCH_RUN@
+#else if $wf.selector == 'none'
+@PRESCREEN_RUN@
+@NUCLEOTIDE_SEARCH_RUN@
+@TRANSLATED_SEARCH_RUN@
+#end if
+--gap-fill '$g_p_quant.gap_fill'
+--minpath '$g_p_quant.minpath'
+--pathways '$g_p_quant.pathways'
+--xipe '$g_p_quant.xipe'
+--annotation-gene-index $g_p_quant.annotation_gene_index
+#if $g_p_quant.id_mapping
+--id-mapping '$g_p_quant.id_mapping'
+#end if
+--log-level 'DEBUG'
+--o-log '$log'
+--output-basename '$out.output_basename'
+--output-format '$out.output_format'
+--output-max-decimals $out.output_max_decimals
+$out.remove_column_description_output
+$out.remove_stratified_output
+--threads "\${GALAXY_SLOTS:-4}"
+--memory-use minimum
+]]></command>
+<inputs>
+<conditional name="in">
+<param name="selector" type="select" label="Input(s)">
+<option value="raw" selected="true">Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))</option>
+<option value="mapping">Pre-computed mappings of reads to database sequences</option>
+<option value="abundance">Pre-computed (typically gene) abundance tables</option>
+</param>
+<when value="raw">
+<param name="input" type="data" format="fastq,fastq.gz,fasta,fasta.gz" label="Paired-end Fasta/FastQ files should be merged first"/>
+</when>
+<when value="mapping">
+<param name="input" type="data" format="sam,bam" label="Pre-computed mappings of reads to database sequences"/>
+</when>
+<when value="abundance">
+<param name="input" type="data" format="tabular,tsv,biom1" label="Pre-computed (typically gene) abundance tables"/>
+</when>
+</conditional>
+<conditional name="wf">
+<param name="selector" type="select" label="Steps">
+<option value="bypass_prescreen">Bypass the prescreen step and run on the full ChocoPhlAn database (--bypass-prescreen)</option>
+<option value="bypass_taxonomic_profiling">Bypass the taxonomic profiling step and creates a custom ChocoPhlAn database of the species</option>
+<option value="bypass_nucleotide_index">Starts the workflow with the nucleotide alignment step using the provided indexed database (--bypass-nucleotide-index)</option>
+<option value="bypass_nucleotide_search">Bypass all of the alignment steps before the translated search (--bypass_nucleotide-search)</option>
+<option value="bypass_translated_search">Run all of the alignment steps except the translated search (--bypass_translated-search)</option>
+<option value="none" selected="true">Run the full workflow steps</option>
+</param>
+<when value="bypass_prescreen">
+<expand macro="nucleotide_search"/>
+<expand macro="translated_search"/>
+</when>
+<when value="bypass_taxonomic_profiling">
+<param argument="--taxonomic-profile" type="data" format="tabular,txt" label="Taxonomic profile file"/>
+<expand macro="nucleotide_search"/>
+<expand macro="translated_search"/>
+</when>
+<when value="bypass_nucleotide_index">
+<expand macro="nucleotide_search"/>
+<expand macro="translated_search"/>
+</when>
+<when value="bypass_nucleotide_search">
+<expand macro="translated_search"/>
+</when>
+<when value="bypass_translated_search">
+<expand macro="prescreen"/>
+<expand macro="nucleotide_search"/>
+</when>
+<when value="none">
+<expand macro="prescreen"/>
+<expand macro="nucleotide_search"/>
+<expand macro="translated_search"/>
+</when>
+</conditional>
+<section name="g_p_quant" title="Gene and pathway quantification" expanded="true">
+<param argument="--gap-fill" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use gap fill computation?"/>
+<param argument="--minpath" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use minpath computation?"/>
+<param argument="--pathways" type="select" label="Database to use for pathway computations">
+<option value="metacyc" selected="true">MetaCyc</option>
+<option value="unipathway">UniPathway</option>
+</param>
+<param argument="--xipe" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use xipe computation?"/>
+<param argument="--annotation-gene-index" type="integer" value="3" label="Index of the gene in the sequence annotation"/>
+<param argument="--id-mapping" type="data" format="tsv" optional="true" label="id mapping file for alignments"/>
+</section>
+<section name="out" title="Outputs" expanded="true">
+<param argument="--output-basename" type="text" value="humann" label="basename">
+<sanitizer invalid_char="">
+<valid initial="string.ascii_letters,string.digits">
+<add value="_" />
+<add value="-" />
+</valid>
+</sanitizer>
+<validator type="empty_field" />
+</param>
+<param argument="--output-format" type="select" label="Format of the output files">
+<option value="tsv" selected="true">Tabular</option>
+<option value="biom">BIOM</option>
+</param>
+<param argument="--output-max-decimals" type="integer" value="10" label="Number of decimals to output"/>
+<param argument="--remove-column-description-output" type="boolean" truevalue="--remove-column-description-output" falsevalue="" checked="false" label="Remove description in the output column?"/>
+<param argument="--remove-stratified-output" type="boolean" truevalue="--remove-stratified-output" falsevalue="" checked="false" label="Remove stratification from output?"/>
+<param name="intermediate_temp" type="select" multiple="true" label="Intermediate output files">
+<option value="metaphlan_bowtie2">MetaPhlAn Bowtie2 output</option>
+<option value="metaphlan_bugs_list">MetaPhlAn bugs list</option>
+<option value="bowtie2_alignment">Bowtie2 alignment results</option>
+<option value="bowtie2_reduced_alignment">Bowtie2 reduced alignment results</option>
+<option value="bowtie2_unaligned">Unaligned reads after Bowtie2</option>
+<option value="custom_chocophlan_database">Custom ChocoPhlAn database</option>
+<option value="diamond_aligned">Translated alignment results</option>
+<option value="diamond_unaligned">Translated alignment unaligned reads</option>
+</param>
+</section>
+</inputs>
+<outputs>
+<data format="tabular" name="gene_families_tsv" from_work_dir="output/*_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" >
+<filter>out['output_format'] == "tsv"</filter>
+</data>
+<data format="biom1" name="gene_families_biom" from_work_dir="output/*_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" >
+<filter>out['output_format'] == "biom"</filter>
+</data>
+<data format="tabular" name="pathcoverage_tsv" from_work_dir="output/*_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >
+<filter>out['output_format'] == "tsv"</filter>
+</data>
+<data format="biom1" name="pathcoverage_biom" from_work_dir="output/*_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >
+<filter>out['output_format'] == "biom"</filter>
+</data>
+<data format="tabular" name="pathabundance_tsv" from_work_dir="output/*_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >
+<filter>out['output_format'] == "tsv"</filter>
+</data>
+<data format="biom1" name="pathabundance_biom" from_work_dir="output/*_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >
+<filter>out['output_format'] == "biom"</filter>
+</data>
+<data format="txt" name="log" label="${tool.name} on ${on_string}: Log"/>
+<data format="tabular" name="metaphlan_bowtie2" from_work_dir="output/*_humann_temp/*_metaphlan_bowtie2.txt" label="${tool.name} on ${on_string}: MetaPhlAn Bowtie2 output" >
+<filter>"metaphlan_bowtie2" in out['intermediate_temp']</filter>
+</data>
+<data format="tabular" name="metaphlan_bugs_list" from_work_dir="output/*_humann_temp/*_metaphlan_bugs_list.tsv" label="${tool.name} on ${on_string}: MetaPhlAn bugs list" >
+<filter>"metaphlan_bugs_list" in out['intermediate_temp']</filter>
+</data>
+<data format="sam" name="bowtie2_alignment" from_work_dir="output/*_humann_temp/*_bowtie2_aligned.sam" label="${tool.name} on ${on_string}: Bowtie2 alignment results" >
+<filter>"bowtie2_alignment" in out['intermediate_temp']</filter>
+</data>
+<data format="tabular" name="bowtie2_reduced_alignment" from_work_dir="output/*_humann_temp/*_bowtie2_aligned.tsv" label="${tool.name} on ${on_string}: Bowtie2 reduced alignment results" >
+<filter>"bowtie2_reduced_alignment" in out['intermediate_temp']</filter>
+</data>
+<data format="fasta" name="bowtie2_unaligned" from_work_dir="output/*_humann_temp/*_bowtie2_unaligned.fa" label="${tool.name} on ${on_string}: Unaligned reads after Bowtie2" >
+<filter>"bowtie2_unaligned" in out['intermediate_temp']</filter>
+</data>
+<data format="fasta" name="custom_chocophlan_database" from_work_dir="output/*_humann_temp/*_custom_chocophlan_database.ffn" label="${tool.name} on ${on_string}: Custom ChocoPhlAn database" >
+<filter>"custom_chocophlan_database" in out['intermediate_temp']</filter>
+</data>
+<data format="tabular" name="diamond_aligned" from_work_dir="output/*_humann_temp/*_diamond_aligned.tsv" label="${tool.name} on ${on_string}: Translated alignment results" >
+<filter>"diamond_aligned" in out['intermediate_temp']</filter>
+</data>
+<data format="fasta" name="diamond_unaligned" from_work_dir="output/*_humann_temp/*_diamond_unaligned.fa" label="${tool.name} on ${on_string}: Translated alignment unaligned reads" >
+<filter>"diamond_unaligned" in out['intermediate_temp']</filter>
+</data>
+</outputs>
+<tests>
+<test expect_num_outputs="12">
+<conditional name="in">
+<!-- raw fasta file -->
+<param name="selector" value="raw"/>
+<param name="input" value="demo.fastq.gz"/>
+</conditional>
+<conditional name="wf">
+<!-- full workflow -->
+<param name="selector" value="none"/>
+<section name="prescreen">
+<conditional name="metaphlan_db">
+<param name="selector" value="history"/>
+<param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/>
+<param name="mpa_pkl" value="test-db/metaphlan-db/demo-db-v30.json"/>
+</conditional>
+<param name="prescreen_threshold" value="0.01"/>
+</section>
+<section name="nucleotide_search">
+<conditional name="nucleotide_db">
+<param name="selector" value="history"/>
+<param name="nucleotide_database">
+<collection type="list">
+<element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz" />
+<element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz" />
+</collection>
+</param>
+</conditional>
+<param name="nucleotide_identity_threshold" value="0"/>
+<param name="nucleotide_subject_coverage_threshold" value="50"/>
+<param name="nucleotide_query_coverage_threshold" value="90"/>
+</section>
+<section name="translated_search">
+<conditional name="protein_db">
+<param name="selector" value="history"/>
+<param name="protein_database" value="test-db/protein-db/uniref90_demo_prots_v201901b.fasta"/>
+<param name="search_mode" value="uniref90"/>
+</conditional>
+<param name="evalue" value="1"/>
+<param name="translated_subject_coverage_threshold" value="50"/>
+<param name="translated_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<!-- intermediate files -->
+<param name="output_basename" value="humann"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="tsv"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp"
+value="metaphlan_bowtie2,metaphlan_bugs_list,bowtie2_alignment,bowtie2_reduced_alignment,bowtie2_unaligned,custom_chocophlan_database,diamond_aligned,diamond_unaligned"/>
+</section>
+<output name="gene_families_tsv" ftype="tabular" value="demo_genefamilies.tsv" compare="sim_size">
+<assert_contents>
+<has_text text="humann_Abundance-RPKs"/>
+<has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_tsv" ftype="tabular" value="demo_pathcoverage.tsv" compare="sim_size">
+<assert_contents>
+<has_text text="humann_Coverage"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathabundance_tsv" ftype="tabular" value="demo_pathabundance.tsv" compare="sim_size">
+<assert_contents>
+<has_text text="humann_Abundance"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="DATABASE SETTINGS"/>
+<has_text text="humann.utilities"/>
+<has_text text="humann_genefamilies"/>
+<has_text text="humann_pathabundance"/>
+<has_text text="humann_pathcoverage"/>
+<has_text text="g__Bacteroides.s__Bacteroides_dorei"/>
+</assert_contents>
+</output>
+<output name="metaphlan_bowtie2" ftype="tabular">
+<assert_contents>
+<has_text text="s__Bacteroides_dorei_read000116"/>
+<has_text text="357276__I9R1V6__DXD47_04125"/>
+<has_text text="s__Bacteroides_dorei_read000129"/>
+<has_text text="357276__B6W1Y5__IY41_11405"/>
+</assert_contents>
+</output>
+<output name="metaphlan_bugs_list" ftype="tabular">
+<assert_contents>
+<has_text text="relative_abundance"/>
+<has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus"/>
+<has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei"/>
+</assert_contents>
+</output>
+<output name="bowtie2_alignment" ftype="sam">
+<assert_contents>
+<has_text text="SN:821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
+<has_text text="s__Bacteroides_dorei_read009840"/>
+<has_text text="PN:bowtie2"/>
+<has_text text="LN:1281"/>
+</assert_contents>
+</output>
+<output name="bowtie2_reduced_alignment" ftype="tabular">
+<assert_contents>
+<has_text text="s__Bacteroides_dorei_read000001"/>
+<has_text text="821__A6L5K0__BVU_3338|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L5K0|UniRef50_A6L5K0|468"/>
+<has_text text="s__Bacteroides_vulgatus_read003845"/>
+<has_text text="821__A0A396BBC3__DXC03_14350|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A396BBC3|UniRef50_A0A174FNA3|2934"/>
+</assert_contents>
+</output>
+<output name="bowtie2_unaligned" ftype="fasta">
+<assert_contents>
+<has_text text=">s__Bacteroides_dorei_read000001|100"/>
+<has_text text=">s__Bacteroides_dorei_read000002|100"/>
+<has_text text=">unclassified_read000971|100"/>
+<has_text text=">s__Bacteroides_vulgatus_read004473|100"/>
+</assert_contents>
+</output>
+<output name="custom_chocophlan_database" ftype="fasta">
+<assert_contents>
+<has_text text=">821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
+<has_text text=">821__F3PUY1__HMPREF9446_02555|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PUY1|UniRef50_A0A3E5DX68|411"/>
+<has_text text=">821__A0A3E4KCH0__DXD33_19495|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A3E4KCH0|UniRef50_F3PP72|3582"/>
+</assert_contents>
+</output>
+<output name="diamond_aligned" ftype="tabular">
+<assert_contents>
+<has_text text="UniRef90_Z5XVM9|969"/>
+<has_text text="s__Bacteroides_vulgatus_read"/>
+<has_text text="s__Bacteroides_vulgatus_read"/>
+<has_text text="UniRef90_Y0KEF3|618"/>
+</assert_contents>
+</output>
+<output name="diamond_unaligned" ftype="fasta">
+<assert_contents>
+<has_text text=">s__Bacteroides_dorei_read000001|100"/>
+<has_text text=">s__Bacteroides_vulgatus_read006412|100"/>
+<has_text text=">unclassified_read000867|100"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="4">
+<conditional name="in">
+<!-- fastq file -->
+<param name="selector" value="raw"/>
+<param name="input" value="demo.fasta.gz"/>
+</conditional>
+<conditional name="wf">
+<!-- bypass_prescreen -->
+<param name="selector" value="bypass_prescreen"/>
+<section name="nucleotide_search">
+<conditional name="nucleotide_db">
+<param name="selector" value="cached"/>
+<param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
+</conditional>
+<param name="nucleotide_identity_threshold" value="0"/>
+<param name="nucleotide_subject_coverage_threshold" value="50"/>
+<param name="nucleotide_query_coverage_threshold" value="90"/>
+</section>
+<section name="translated_search">
+<conditional name="protein_db">
+<param name="selector" value="cached"/>
+<param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
+</conditional>
+<param name="evalue" value="1"/>
+<param name="translated_subject_coverage_threshold" value="50"/>
+<param name="translated_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<!-- Biom -->
+<param name="output_basename" value="humann"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="biom"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp" value=""/>
+</section>
+<output name="gene_families_biom" ftype="biom1">
+<assert_contents>
+<has_text text="http://biom-format.org"/>
+<has_text text="UniRef90_A0A396BPQ7|g__Bacteroides.s__Bacteroides_vulgatus"/>
+<has_text text="UniRef90_W8YTG4|unclassified"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_biom" ftype="biom1">
+<assert_contents>
+<has_text text="TREE"/>
+<has_text text="format-url"/>
+<has_text text="http://biom-format.org"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_text text="humann_Coverage"/>
+</assert_contents>
+</output>
+<output name="pathabundance_biom" ftype="biom1">
+<assert_contents>
+<has_text text="TREE"/>
+<has_text text="format-url"/>
+<has_text text="http://biom-format.org"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_text text="humann_Abundance"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="Running bowtie2-build ........"/>
+<has_text text="Total bugs from nucleotide alignment: 2"/>
+<has_text text="Total gene families from nucleotide alignment: "/>
+<has_text text="Aligning to reference database: "/>
+<has_text text="Total gene families after translated alignment: "/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="4">
+<conditional name="in">
+<param name="selector" value="raw"/>
+<param name="input" value="demo.fasta.gz"/>
+</conditional>
+<conditional name="wf">
+<!-- bypass_taxonomic_profiling -->
+<param name="selector" value="bypass_taxonomic_profiling"/>
+<param name="taxonomic_profile" value="demo-taxonomic-profile.tabular"/>
+<section name="nucleotide_search">
+<conditional name="nucleotide_db">
+<param name="selector" value="cached"/>
+<param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
+</conditional>
+<param name="nucleotide_identity_threshold" value="0"/>
+<param name="nucleotide_subject_coverage_threshold" value="50"/>
+<param name="nucleotide_query_coverage_threshold" value="90"/>
+</section>
+<section name="translated_search">
+<conditional name="protein_db">
+<param name="selector" value="cached"/>
+<param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
+</conditional>
+<param name="evalue" value="1"/>
+<param name="translated_subject_coverage_threshold" value="50"/>
+<param name="translated_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<param name="output_basename" value="humann"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="tsv"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp" value=""/>
+</section>
+<output name="gene_families_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Abundance-RPKs"/>
+<has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
+<has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Coverage"/>
+<has_text text="UNINTEGRATED|unclassified"/>
+<has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathabundance_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Abundance"/>
+<has_text text="UNINTEGRATED|unclassified"/>
+<has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="Found g__Bacteroides.s__Bacteroides_vulgatus : "/>
+<has_text text="Total species selected from prescreen: 2"/>
+<has_text text="Total bugs from nucleotide alignment: 2"/>
+<has_text text="g__Bacteroides.s__Bacteroides_vulgatus: "/>
+<has_text text="g__Bacteroides.s__Bacteroides_dorei: "/>
+<has_text text="Total gene families from nucleotide alignment: "/>
+<has_text text="Total bugs after translated alignment: 3"/>
+<has_text text="Total gene families after translated alignment"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="4">
+<conditional name="in">
+<!-- mapping SAM file -->
+<param name="selector" value="mapping"/>
+<param name="input" value="demo.sam"/>
+</conditional>
+<conditional name="wf">
+<!-- bypass_nucleotide_index -->
+<param name="selector" value="bypass_nucleotide_index"/>
+<section name="nucleotide_search">
+<conditional name="nucleotide_db">
+<param name="selector" value="cached"/>
+<param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
+</conditional>
+<param name="nucleotide_identity_threshold" value="0"/>
+<param name="nucleotide_subject_coverage_threshold" value="50"/>
+<param name="nucleotide_query_coverage_threshold" value="90"/>
+</section>
+<section name="translated_search">
+<conditional name="protein_db">
+<param name="selector" value="cached"/>
+<param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
+</conditional>
+<param name="evalue" value="1"/>
+<param name="translated_subject_coverage_threshold" value="50"/>
+<param name="translated_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<param name="output_basename" value="humann"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="tsv"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp" value=""/>
+</section>
+<output name="gene_families_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei"/>
+<has_text text="UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="UNMAPPED"/>
+<has_text text="UNINTEGRATED"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathabundance_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="UNMAPPED"/>
+<has_text text="UNINTEGRATED"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="Process the sam mapping results"/>
+<has_text text="Computing gene families"/>
+<has_text text="Computing pathways abundance and coverage"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="4">
+<conditional name="in">
+<!-- raw fasta file -->
+<param name="selector" value="raw"/>
+<param name="input" value="demo.fastq.gz"/>
+</conditional>
+<conditional name="wf">
+<!-- bypass_nucleotide_search -->
+<param name="selector" value="bypass_nucleotide_search"/>
+<section name="translated_search">
+<conditional name="protein_db">
+<param name="selector" value="cached"/>
+<param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
+</conditional>
+<param name="evalue" value="1"/>
+<param name="translated_subject_coverage_threshold" value="50"/>
+<param name="translated_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<param name="output_basename" value="humann"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="tsv"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp" value=""/>
+</section>
+<output name="gene_families_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Abundance-RPKs"/>
+<has_text text="UniRef90_Q9ZUH4|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Coverage"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathabundance_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="humann_Abundance"/>
+<has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="Total bugs after translated alignment: 1"/>
+<has_text text="unclassified: "/>
+<has_text text="Unaligned reads after translated alignment: "/>
+<has_text text="Total gene families"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="4">
+<conditional name="in">
+<!-- raw fasta file -->
+<param name="selector" value="raw"/>
+<param name="input" value="demo.fastq.gz"/>
+</conditional>
+<conditional name="wf">
+<!-- bypass_translated_search -->
+<param name="selector" value="bypass_translated_search"/>
+<section name="prescreen">
+<conditional name="metaphlan_db">
+<param name="selector" value="cached"/>
+<param name="cached_db" value="metaphlan-demo-db-20210421"/>
+</conditional>
+<param name="prescreen_threshold" value="0.01"/>
+</section>
+<section name="nucleotide_search">
+<conditional name="nucleotide_db">
+<param name="selector" value="cached"/>
+<param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
+</conditional>
+<param name="nucleotide_identity_threshold" value="0"/>
+<param name="nucleotide_subject_coverage_threshold" value="50"/>
+<param name="nucleotide_query_coverage_threshold" value="90"/>
+</section>
+</conditional>
+<section name="g_p_quant">
+<param name="gap_fill" value="true"/>
+<param name="minpath" value="true"/>
+<param name="pathways" value="metacyc"/>
+<param name="xipe" value="false"/>
+<param name="annotation_gene_index" value="3"/>
+</section>
+<section name="out">
+<param name="output_basename" value="newname"/>
+<param name="log_level" value="DEBUG"/>
+<param name="output_format" value="tsv"/>
+<param name="output_max_decimals" value="10"/>
+<param name="remove_column_description_output" value="false"/>
+<param name="remove_statified_output" value="false"/>
+<param name="intermediate_temp" value=""/>
+</section>
+<output name="gene_families_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="newname_Abundance-RPKs"/>
+<has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathcoverage_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="newname_Coverage"/>
+<has_text text="UNMAPPED"/>
+<has_text text="UNINTEGRATED"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="pathabundance_tsv" ftype="tabular">
+<assert_contents>
+<has_text text="newname_Abundance"/>
+<has_text text="UNMAPPED"/>
+<has_text text="UNINTEGRATED"/>
+<has_n_columns n="2"/>
+</assert_contents>
+</output>
+<output name="log" ftype="txt">
+<assert_contents>
+<has_text text="Total bugs from nucleotide alignment: 2"/>
+<has_text text="g__Bacteroides.s__Bacteroides_vulgatus: 1195 hits"/>
+<has_text text="g__Bacteroides.s__Bacteroides_dorei: 1260 hits"/>
+<has_text text="Total gene families from nucleotide alignment: 545"/>
+<has_text text="Bypass translated search"/>
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+@HELP_HEADER@
+This tool corresponds to the main tool in HUMAnN pipeline:
+1. Taxomonic prescreen
+Reads are mapped (with MetaPhlAn) to clade-specific marker genes to rapidly identify community species
+2. Pangenome search (nucleotide search)
+Reads are mapped (with Bowtie2) to pangenomes of identified species
+3. Translated search
+Unclassified reads are aligned to a comprehensive and non-redundant protein database
+4. Gene family and pathway quantification
+- Gene abundance estimation
+Mapping results are processed to estimate per-species and community total gene family abundance, weighting by
+- alignment Quality
+- gene length
+- gene coverage
+- Per-species and community-level metabolic network reconstruction
+Genes are mapped to metabolic reactions to identify a parsiomonious set of pathways that explains each species' observed reactions
+Pathway abundance and coverage are quantified by:
+1. optimizing over alternative subpathways
+2. imputing abundance for conspicuously depleted reactions
+Inputs
+======
+HUMAnN can start from a few different types of input data each in a few different types of formats:
+- Quality-controlled shotgun sequencing reads
+This is the most common starting point : A metagenome (DNA reads) or metatranscriptome (RNA reads)
+- Pre-computed mappings of reads to database sequences
+- Pre-computed (typically gene) abundance tables
+HUMAnN uses 3 reference databases
+Locally cached databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload.
+Outputs
+=======
+HUMAnN creates three output files:
+- Gene families and their abundance
+- Pathways and their abundance
+- Pathways and their coverage
+Ten intermediate temp output files can also be retrieved.
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > humann

comparison humann.xml @ 0:65c80ca30373 draft