view metaphlan.xml @ 12:1a037928504c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 671a5fc6d4c02bd3eb830c1886a31ecffd134ceb
author iuc
date Sun, 11 Aug 2024 20:35:53 +0000
parents b6897977d13e
children ef65b083bd0c
line wrap: on
line source

<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
    <description>to profile the composition of microbial communities</description>
    <macros>
        <import>macros.xml</import>
        <xml name="tax_lev">
            <conditional name="tax_lev">
                <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
                    <option value="a" selected="true">All taxonomic levels</option>
                    <option value="k">Kingdoms only</option>
                    <option value="p">Phyla only</option>
                    <option value="c">Classes only</option>
                    <option value="o">Orders only</option>
                    <option value="f">Families only</option>
                    <option value="g">Genera only</option>
                    <option value="s">Species only</option>
                </param>
                <when value="a">
                    <param name="split_levels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>
                </when>
                <when value="k"/>
                <when value="p"/>
                <when value="c"/>
                <when value="o"/>
                <when value="f"/>
                <when value="g"/>
                <when value="s"/>
            </conditional>
        </xml>
        <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <version_command>metaphlan -v</version_command>
    <command detect_errors="aggressive"><![CDATA[
#if $inputs.in.selector == "raw"
    #if $inputs.in.raw_in.selector == "single"
        #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
        #if $full_ext.endswith("gz")
            #set $file_path="in"     
            zcat '$inputs.in.raw_in.in' > '$file_path' &&
        #else if $full_ext.endswith("bz2")
            #set $file_path="in"
            bzcat '$inputs.in.raw_in.in' > '$file_path' &&
        #else
            #set $file_path="'%s'" % $inputs.in.raw_in.in
        #end if
    #else if $inputs.in.raw_in.selector == "multiple"
        #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
        #set file_path=""
        #set sep=""
        #for $i, $f in enumerate($inputs.in.raw_in.in)
            #if $f.datatype.file_ext != $full_ext
            echo "Different datatypes for input files" &&
            exit 1
            #end if          
            #if $full_ext.endswith("gz")
                #set fp="input_%s" % ($i)
                zcat '$f' > '$fp' &&
            #else if $full_ext.endswith("bz2")
                #set fp="input_%s" % ($i)
                bzcat '$f' > '$fp' &&
            #else
                #set fp=$f
            #end if
            #set $file_path+="'%s%s'" % ($sep, $fp)
            #set $sep=","
        #end for
    #else if $inputs.in.raw_in.selector == "paired"
        #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
        #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
            echo "Different datatypes for input paired-end files" &&
            exit 1
        #end if
        #if $full_ext.endswith("gz")
            zcat '$inputs.in.raw_in.in_f' > 'in_f' &&
            zcat '$inputs.in.raw_in.in_r' > 'in_r' &&
            #set file_path="-1 in_f -2 in_r"
        #else if $full_ext.endswith("bz2")
            bzcat '$inputs.in.raw_in.in_f' > 'in_f' && 
            bzcat '$inputs.in.raw_in.in_r' > 'in_r' &&
            #set file_path="-1 in_f -2 in_r"
        #else
            #set file_path="-1 '%s' -2 '%s'" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)
        #end if
    #else if $inputs.in.raw_in.selector == "paired_collection"
        #set full_ext=$inputs.in.raw_in.in.forward.ext
        #if $full_ext != $inputs.in.raw_in.in.reverse.ext
            echo "Different datatypes for input paired-end files" &&
            exit 1
        #end if
        #if $full_ext.endswith("gz")
            zcat '$inputs.in.raw_in.in.forward' > 'in_f' &&
            zcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
            #set file_path="-1 in_f -2 in_r"
        #else if $full_ext.endswith("bz2")
            bzcat '$inputs.in.raw_in.in.forward' > 'in_f' && 
            bzcat '$inputs.in.raw_in.in.reverse' > 'in_r' &&
            #set file_path="-1 in_f -2 in_r"
        #else
            #set file_path="-1 '%s' -2 '%s'" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)
        #end if
    #end if

    #if $full_ext.startswith("fastq")
        #set ext='fastq'
    #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
        #set ext='fasta'
    #else
        #set ext=$full_ext
    #end if
#end if

#if $inputs.db.db_selector == "history"
mkdir 'ref_db' &&
bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db' &&
python
    '$__tool_directory__/customizemetadata.py'
    transform_json_to_pkl
    --json '$inputs.db.mpa_pkl'
    --pkl 'ref_db/custom_db.pkl' &&
#end if

metaphlan
#if $inputs.in.selector == "raw"
    $file_path
    --input_type '$ext'
    --read_min_len $inputs.in.read_min_len
    --bt2_ps '$inputs.in.mapping.bt2_ps'
    --min_mapq_val $inputs.in.mapping.min_mapq_val
    #if $ext == "sam"
        --nreads \$(cat '$file_path' | grep -c -v '^@')
    #end if
#else
    '$inputs.in.in'
    --input_type '$inputs.in.selector'
    #if $inputs.in.selector == "sam"
        --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
    #end if
#end if
#if $inputs.db.db_selector == "cached"
    --bowtie2db '$inputs.db.cached_db.fields.path'
    --index '$inputs.db.cached_db.fields.dbkey'
#else
    --bowtie2db 'ref_db/'
    --index 'custom_db'
#end if
    -t '$analysis.analysis_type.t'
#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
    --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
    --clade '$analysis.analysis_type.clade'
    #if str($analysis.analysis_type.min_ab) != ''
    --min_ab $analysis.analysis_type.min_ab
    #end if
#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
    --nreads $$analysis.analysis_type.nreads
#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
    --pres_th $analysis.analysis_type.pres_th
#end if
    --min_cu_len $analysis.min_cu_len
#if str($analysis.min_alignment_len) != ''
    --min_alignment_len $analysis.min_alignment_len
#end if
#if 'add_viruses' in $analysis.organism_profiling
    --add_viruses
#end if
#if 'ignore_eukaryotes' in $analysis.organism_profiling
    --ignore_eukaryotes
#end if
#if 'ignore_bacteria' in $analysis.organism_profiling
    --ignore_bacteria
#end if
#if 'ignore_archaea' in $analysis.organism_profiling
    --ignore_archaea
#end if
    --stat_q $analysis.stat_q
    --perc_nonzero $analysis.perc_nonzero
#if $analysis.ignore_markers
    --ignore_markers '$analysis.ignore_markers'
#end if
    $analysis.avoid_disqm
    --sample_id_key '$out.sample_id_key'
    --sample_id '$out.sample_id'
    $out.use_group_representative
    $out.legacy_output
    $out.CAMI_format_output
    $out.unclassified_estimation
    -o '$output_file'
    --bowtie2out 'bowtie2out'
    -s '$sam_output_file'
    --biom '$biom_output_file'
    --nproc \${GALAXY_SLOTS:-4}
#if $viral_analysis.profile_vsc
    $viral_analysis.profile_vsc
    --vsc_out '$vcs_breath_coverage'
    --vsc_breadth $viral_analysis.vsc_breadth
#end if

#if $subsample.selector != "no"
    #if $subsample.selector == "single"
        --subsampling $subsample.subsampling
    #else
        --subsampling_paired $subsample.subsampling_paired
    #end if
    $subsample.mapping_subsampling
    #if $subsample.subsampling_seed
        --subsampling_seed $subsample.subsampling_seed
    #end if
    --subsampling_output subsampled.out
#end if

#if $test == "false"
    --offline
#end if

#if $inputs.in.selector == "raw"
&&
mv 'bowtie2out' '$bowtie2out'
#end if


#if $analysis.analysis_type.t in ['rel_ab', 'rel_ab_w_read_stats']
    #if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels
        &&
        mkdir 'split_levels'
        &&
        python '$__tool_directory__/formatoutput.py'
            split_levels
            --metaphlan_output '$output_file'
            --outdir 'split_levels'
            $out.legacy_output
    #end if
#end if

#if $out.krona_output
&&
python '$__tool_directory__/formatoutput.py'
    format_for_krona
    --metaphlan_output '$output_file'
    --krona_output '$krona_output_file'
#end if
    ]]></command>
    <inputs>
        <section name="inputs" title="Inputs" expanded="true">
            <conditional name="in">
                <param name="selector" type="select" label="Input(s)">
                    <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
                    <option value="sam">Externally BowTie2-mapped SAM file</option>
                    <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
                </param>
                <when value="raw">
                    <conditional name="raw_in">
                        <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads">
                            <option value="single" selected="true">One single-end file</option>
                            <option value="multiple">Multiple single-end files</option>
                            <option value="paired_collection">Paired-end collection</option>
                            <option value="paired">Paired-end files</option>
                        </param>
                        <when value="single">
                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/>
                        </when>
                        <when value="multiple">
                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ files with microbiota reads" multiple="true"/>
                        </when>
                        <when value="paired_collection">
                            <param name="in" type="data_collection" format="@FILE_FORMATS@" label="Paired-end Fasta/FastQ file with microbiota reads" collection_type="paired"/>
                        </when>
                        <when value="paired">
                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/>
                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/>
                        </when>
                    </conditional>
                    <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
                    <section name="mapping" title="Mapping" expanded="true">
                        <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
                            <option value="sensitive">Sensitive</option>
                            <option value="very-sensitive" selected="true">Very sensitive</option>
                            <option value="sensitive-local">Sensitive local</option>
                            <option value="very-sensitive-local">Very sensitive local</option>
                        </param>
                        <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
                    </section>
                </when>
                <when value="sam">
                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
                </when>
                <when value="bowtie2out">
                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions &gt;3.0"/>
                </when>
            </conditional>
            <conditional name="db">
                <param name="db_selector" type="select" label="Database with clade-specific marker genes">
                    <option value="cached" selected="true">Locally cached</option>
                    <option value="history">From history</option>
                </param>
                <when value="cached">
                    <param name="cached_db" type="select" label="Cached database with clade-specific marker genes">
                        <options from_data_table="@IDX_DATA_TABLE@">
                            <filter type="static_value" column="4" value="@IDX_VERSION@"/>
                            <validator message="No compatible MetaPhlAn database is available" type="no_options"/>
                        </options>
                    </param>
                </when>
                <when value="history">
                    <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
                    <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
                </when>
            </conditional>
        </section>
        <section name="analysis" title="Analysis" expanded="true">
            <conditional name="analysis_type">
                <param argument="-t" type="select" label="Type of analysis to perform">
                    <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
                    <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
                    <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
                    <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when &gt; 0.0 and normalized by microbiota size if number of reads is specified)</option>
                    <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
                    <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
                </param>
                <when value="rel_ab">
                    <expand macro="tax_lev"/>
                </when>
                <when value="rel_ab_w_read_stats">
                    <expand macro="tax_lev"/>
                </when>
                <when value="reads_map"/>
                <when value="clade_profiles"/>
                <when value="clade_specific_strain_tracker">
                    <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/>
                    <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
                </when>
                <when value="marker_ab_table">
                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
                </when>
                <when value="marker_counts"/>
                <when value="marker_pres_table">
                    <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
                </when>
            </conditional>
            <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
            <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
            <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true">
                <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
                <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
                <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
                <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
            </param>
            <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
                <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
                <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
                <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
                <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
                <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
                <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
                <option value="med">med: Median of length-normalized marker counts</option>
            </param>
            <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
            <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
            <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
            <param argument="--avoid_disqm" type="boolean" truevalue="--avoid_disqm" falsevalue="" checked="true" label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
        </section>
        <conditional name="subsample">
            <param name="selector" type="select" label="Subsample" help="Subsampling only works for fastq input">
                <option value="no">No</option>
                <option value="single">Yes: specify number of reads</option>
                <option value="paired">Yes: specify number of paired reads</option>
            </param>
            <when value="no"/>
            <when value="single">
                <param argument="--subsampling" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of reads to be considered"/>
                <expand macro="subsample_common"/>
            </when>
            <when value="paired">
                <param argument="--subsampling_paired" type="integer" min="1" value="" label="Sumbsample reads" help="Specify the number of paired reads to be considered. For N there will be floor(N/2) reads selected from the forward and reverse reads each."/>
                <expand macro="subsample_common"/>
            </when>
        </conditional>
        <conditional name="viral_analysis">
            <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach">
                <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option>
                <option value="" selected="true">No</option>
            </param>
            <when value="--profile_vsc">
                <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/>
            </when>
            <when value=""/>
        </conditional>
        <section name="out" title="Outputs" expanded="true">
            <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
            <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
            <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/>
            <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/>
            <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/>
            <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
            <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/>
        </section>
        <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) -->
        <param name="test" type="hidden" value="false"/>
    </inputs>
    <outputs>
        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/>
        <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
            <filter>inputs['in']['selector'] == "raw"</filter>
        </data>
        <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file">
            <filter>inputs['in']['selector'] == "raw"</filter>
        </data>
        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/>
        <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>
            <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter>
        </collection>
        <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona">
            <filter>out['krona_output']</filter>
        </data>
        <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage">
            <filter>viral_analysis['profile_vsc']</filter>
        </data>
        <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads">
            <filter>subsample['selector'] == 'single'</filter>
        </data>
        <collection name="subsample_paired" type="paired" label="${tool.name} on ${on_string}: Subsampled paired reads">
            <data name="forward" format="fastqsanger" from_work_dir="subsampled.R1.out"/>
            <data name="reverse" format="fastqsanger" from_work_dir="subsampled.R2.out"/>
            <filter>subsample['selector'] == 'paired'</filter>
        </collection>
    </outputs>
    <tests>
        <!-- Single GZ file, Cached db -->
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="single"/>
                        <param name="in" value="no_taxon_input.fasta"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular">
                <assert_contents>
                    <has_text text="UNCLASSIFIED"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular">
                <assert_contents>
                    <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <not_has_text text="p__Actinobacteria"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list">
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="class"/>
                        <has_n_columns n="17"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class_id"/>
                        <not_has_text text="phylum_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family_id"/>
                        <not_has_text text="order"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus_id"/>
                        <not_has_text text="family"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order_id"/>
                        <not_has_text text="class_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum_id"/>
                        <not_has_text text="kingdom_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species_id"/>
                        <not_has_text text="genus"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains_id"/>
                        <not_has_text text="species_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_n_lines n="1" delta="1"/>
                    <has_size value="1" delta="1"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Single GZ file, Cached db -->
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="single"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list">
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="Gammaproteobacteria"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="17"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class_id"/>
                        <not_has_text text="phylum_id"/>
                        <has_text text="Actinobacteria"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family_id"/>
                        <not_has_text text="order"/>
                        <has_text text="Propionibacteriaceae"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus_id"/>
                        <not_has_text text="family"/>
                        <has_text text="Cutibacterium"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom_id"/>
                        <has_text text="Bacteria"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order_id"/>
                        <not_has_text text="class_id"/>
                        <has_text text="Propionibacteriales"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum_id"/>
                        <not_has_text text="kingdom_id"/>
                        <has_text text="Firmicutes"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species_id"/>
                        <not_has_text text="genus"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains_id"/>
                        <not_has_text text="species_id"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_text text="Corynebacterium accolens"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Multiple GZ file, Local db-->
        <test expect_num_outputs="4">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="multiple"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="history"/>
                    <param name="bowtie2db" value="test-db.fasta"/>
                    <param name="mpa_pkl" value="test-db.json"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
        <test expect_num_outputs="7">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="paired"/>
                        <param name="in_f" value="SRS014464-Anterior_nares.fastq.gz"/>
                        <param name="in_r" value="SRS014464-Anterior_nares.fastq.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <conditional name="subsample">
                <param name="selector" value="paired"/>
                <param name="subsampling_paired" value="20257"/>
                <param name="subsampling_seed" value="42"/>
            </conditional>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/>
                    <has_text text="90240__A0A378QWM4__NCTC12877_00123"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam">
                <assert_contents>
                    <has_size min="52400" max="52600"/>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                </assert_contents>
            </output>
            <output_collection name="subsample_paired" type="paired">
                <element name="forward">
                    <assert_contents>
                        <has_line_matching expression="^@.*" n="10128"/>
                    </assert_contents>
                </element>
                <element name="reverse">
                    <assert_contents>
                        <has_line_matching expression="^@.*" n="10128"/>
                    </assert_contents>
                </element>
            </output_collection>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
        <test expect_num_outputs="7">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="paired_collection"/>
                        <param name="in">
                            <collection type="paired" name="pair">
                                <element name="forward" value="SRS014464-Anterior_nares.fastq.gz"/>
                                <element name="reverse" value="SRS014464-Anterior_nares.fastq.gz"/>
                            </collection>
                        </param>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <conditional name="subsample">
                <param name="selector" value="paired"/>
                <param name="subsampling_paired" value="20257"/>
                <param name="subsampling_seed" value="42"/>
            </conditional>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/>
                    <has_text text="90240__A0A378QWM4__NCTC12877_00123"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam">
                <assert_contents>
                    <has_size min="52400" max="52600"/>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                </assert_contents>
            </output>
            <output_collection name="subsample_paired" type="paired">
                <element name="forward">
                    <assert_contents>
                        <has_line_matching expression="^@.*" n="10128"/>
                    </assert_contents>
                </element>
                <element name="reverse">
                    <assert_contents>
                        <has_line_matching expression="^@.*" n="10128"/>
                    </assert_contents>
                </element>
            </output_collection>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- SAM, cached DB -->
        <test expect_num_outputs="2">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="sam"/>
                    <param name="in" value="SRS014464-Anterior_nares.sam"/>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- bowtie2out, cached DB -->
        <test expect_num_outputs="2">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="bowtie2out"/>
                    <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <conditional name="in">
                <param name="selector" value="bowtie2out"/>
                <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
            </conditional>
            <section name="mapping">
                <param name="bt2_ps" value="sensite"/>
                <param name="min_mapq_val" value="5"/>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Single FASTA file, Cached db -->
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="single"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="ignore_markers" value="marker.txt"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="true"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unclassified_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="SampleID"/>
                    <has_text text="Metaphlan_Analysis"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list">
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="Gammaproteobacteria"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="9"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class"/>
                        <has_text text="Actinobacteria"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family"/>
                        <has_text text="Propionibacteriaceae"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus"/>
                        <has_text text="Cutibacterium"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom"/>
                        <has_text text="Bacteria"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order"/>
                        <has_text text="Propionibacteriales"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum"/>
                        <has_text text="Firmicutes"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_text text="Corynebacterium accolens"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Downloading" negate="true"/>
            </assert_stderr>
        </test>
        <!-- Check a non-default analysis mode 
             and viral analysis -->
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <param name="selector" value="single"/>
                        <param name="in" value="SRS014464-Anterior_nares.fastq.gz"/>
                    </conditional>
                </conditional>
                <conditional name="db">
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="marker_ab_table"/>
                </conditional>
            </section>
            <conditional name="viral_analysis">
                <param name="profile_vsc" value="--profile_vsc"/>
                <param name="vsc_out" value="true"/>
            </conditional>
            <conditional name="subsample">
                <param name="selector" value="single"/>
                <param name="subsampling" value="10000"/>
                <param name="subsampling_seed" value="42"/>
            </conditional>
            <param name="test" value="true"/>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="SGB7017__MKDPKOFL_00679"/>
                    <has_text text="SampleID"/>
                    <has_text text="Metaphlan_Analysis"/>
                </assert_contents>
            </output>
            <output name="subsample_single">
                <assert_contents>
                    <has_text text="@" n="10000"/>
                </assert_contents>
            </output>
            <!-- reference data empty -> empty output -->
            <output name="vcs_breath_coverage" ftype="tabular">
                <assert_contents>
                    <has_size size="0"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--profile_vsc"/>
                <has_text text="--vsc_breadth 0.75"/>
                <has_text text="--vsc_out"/>
            </assert_command>
            <assert_stderr>
                <has_text text="Downloading"/>
                <!-- due to test=true and the absence of the TOY reference DB Metaphlan will download to ~10MB-->
                <has_text text="No reads aligning to VSC markers"/>
            </assert_stderr>
        </test>
    </tests>
    <help><![CDATA[
What it does
============

MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, 
Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. 

MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes 
(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:

- unambiguous taxonomic assignments;
- accurate estimation of organismal relative abundance;
- species-level resolution for bacteria, archaea, eukaryotes and viruses;
- strain identification and tracking
- orders of magnitude speedups compared to existing methods.
- microbiota strain-level population genomics

MetaPhlAn clade-abundance estimation
------------------------------------

The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and 
strains in particular cases) present in the microbiota obtained from a microbiome sample and their 
relative abundance.

Marker level analysis
---------------------

MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
aggregated marker information. Such capability comes with several slightly different flavours and 
are a way to perform strain tracking and comparison across multiple samples.

Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the 
species present in the community, and then a strain-level profiling can be performed to zoom-in on 
specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 
intermediate file saved during the execution of the default analysis type.

Inputs
======

Metaphlan takes as input either:

- one or several sequence files in Fasta, FastQ (whether compressed or not)
- a BowTie2 produced SAM file
- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run

It also needs the reference database, which can be locally installed or customized using the dedicated tools.

Outputs
=======

The main output is a tab-separated file with the predicted taxon relative abundances.

It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.


More help and use cases
=======================

To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.

.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage

    ]]></help>
    <expand macro="citations"/>
</tool>