Mercurial > repos > iuc > metaphlan

<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to profile the composition of microbial communities</description>
    <macros>
        <import>macros.xml</import>
        <xml name="tax_lev">
            <conditional name="tax_lev">
                <param argument="--tax_lev" type="select" label="Taxonomic level for the relative abundance output">
                    <option value="a" selected="true">All taxonomic levels</option>
                    <option value="k">Kingdoms only</option>
                    <option value="p">Phyla only</option>
                    <option value="c">Classes only</option>
                    <option value="o">Orders only</option>
                    <option value="f">Families only</option>
                    <option value="g">Genera only</option>
                    <option value="s">Species only</option>
                </param>
                <when value="a">
                    <param name="split_levels" type='boolean' checked="false" truevalue='true' falsevalue='false'
                        label="Generate a report for each taxonomic level?" help="It will be in addition to the default output"/>
                </when>
                <when value="k"/>
                <when value="p"/>
                <when value="c"/>
                <when value="o"/>
                <when value="f"/>
                <when value="g"/>
                <when value="s"/>
            </conditional>
        </xml>
        <token name="@FILE_FORMATS@">fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2</token>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <version_command>metaphlan -v</version_command>
    <command detect_errors="aggressive"><![CDATA[
#if $inputs.in.selector == "raw"
    #if $inputs.in.raw_in.selector == "single"
        #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
        #if $full_ext.endswith("gz")
            #set $file_path="in"
zcat '$inputs.in.raw_in.in' > '$file_path'
&&
        #else if $full_ext.endswith("bz2")
            #set $file_path="in"
bzcat '$inputs.in.raw_in.in' > '$file_path'
&&
        #else
            #set $file_path=$inputs.in.raw_in.in
        #end if
    #else if $inputs.in.raw_in.selector == "multiple"
        #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
        #set file_path=""
        #set sep=""
        #for $i, $f in enumerate($inputs.in.raw_in.in)
            #if $f.datatype.file_ext != $full_ext
echo "Different datatypes for input files"
&&
exit 1
            #end if
            #if $full_ext.endswith("gz")
                #set fp="input_%s" % ($i)
zcat '$f' > '$fp'
&&
            #else if $full_ext.endswith("bz2")
                #set fp="input_%s" % ($i)
bzcat '$f' > '$fp'
&&
            #else
                #set fp=$f
            #end if
            #set $file_path+="%s%s" % ($sep, $fp)
            #set $sep=","
        #end for
    #else if $inputs.in.raw_in.selector == "paired"
        #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
        #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
echo "Different datatypes for input paired-end files"
&&
exit 1
        #end if
        #if $full_ext.endswith("gz")
zcat '$inputs.in.raw_in.in_f' > 'in_f'
&&
zcat '$inputs.in.raw_in.in_r' > 'in_r'
&&
            #set file_path="in_f,in_r"
        #else if $full_ext.endswith("bz2")
bzcat '$inputs.in.raw_in.in_f' > 'in_f'
&&
bzcat '$inputs.in.raw_in.in_r' > 'in_r'
&&
            #set file_path="in_f,in_r"
        #else
            #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)
        #end if
    #end if

    #if $full_ext.startswith("fastq")
        #set ext='fastq'
    #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
        #set ext='fasta'
    #else
        #set ext=$full_ext
    #end if
#end if

#if $inputs.db.db_selector == "history"
mkdir 'ref_db'
&&
bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db'
&&
python '$__tool_directory__/customizemetadata.py'
    transform_json_to_pkl
    --json '$inputs.db.mpa_pkl'
    --pkl 'ref_db/custom_db.pkl'
&&
#end if

metaphlan
#if $inputs.in.selector == "raw"
    '$file_path'
    --input_type '$ext'
    --read_min_len $inputs.in.read_min_len
    --bt2_ps '$inputs.in.mapping.bt2_ps'
    --min_mapq_val $inputs.in.mapping.min_mapq_val
    #if $ext == "sam"
        --nreads \$(cat '$file_path' | grep -c -v '^@')
    #end if
#else
    '$inputs.in.in'
    --input_type '$inputs.in.selector'
    #if $inputs.in.selector == "sam"
        --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
    #end if
#end if
#if $inputs.db.db_selector == "cached"
    --bowtie2db '$inputs.db.cached_db.fields.path'
    --index '$inputs.db.cached_db.fields.dbkey'
#else
    --bowtie2db 'ref_db/'
    --index 'custom_db'
#end if
    -t '$analysis.analysis_type.t'
#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
    --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
    --clade '$analysis.analysis_type.clade'
    #if str($analysis.analysis_type.min_ab) != ''
    --min_ab $analysis.analysis_type.min_ab
    #end if
#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
    --nreads $$analysis.analysis_type.nreads
#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
    --pres_th $analysis.analysis_type.pres_th
#end if
    --min_cu_len $analysis.min_cu_len
#if str($analysis.min_alignment_len) != ''
    --min_alignment_len $analysis.min_alignment_len
#end if
#if 'add_viruses' in $analysis.organism_profiling
    --add_viruses
#end if
#if 'ignore_eukaryotes' in $analysis.organism_profiling
    --ignore_eukaryotes
#end if
#if 'ignore_bacteria' in $analysis.organism_profiling
    --ignore_bacteria
#end if
#if 'ignore_archaea' in $analysis.organism_profiling
    --ignore_archaea
#end if
    --stat_q $analysis.stat_q
    --perc_nonzero $analysis.perc_nonzero
#if $analysis.ignore_markers
    --ignore_markers '$analysis.ignore_markers'
#end if
    $analysis.avoid_disqm
    --sample_id_key '$out.sample_id_key'
    --sample_id '$out.sample_id'
    $out.use_group_representative
    $out.legacy_output
    $out.CAMI_format_output
    $out.unknown_estimation
    -o '$output_file'
    --bowtie2out 'bowtie2out'
    -s '$sam_output_file'
    --biom '$biom_output_file'
    --nproc \${GALAXY_SLOTS:-4}

#if $inputs.in.selector == "raw"
&&
mv 'bowtie2out' '$bowtie2out'
#end if

#if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels
&&
mkdir 'split_levels'
&&
python '$__tool_directory__/formatoutput.py'
    split_levels
    --metaphlan_output '$output_file'
    --outdir 'split_levels'
    $out.legacy_output

#end if

#if $out.krona_output
&&
python '$__tool_directory__/formatoutput.py'
    format_for_krona
    --metaphlan_output '$output_file'
    --krona_output '$krona_output_file'
#end if
    ]]></command>
    <inputs>
        <section name="inputs" title="Inputs" expanded="true">
            <conditional name="in">
                <param name="selector" type="select" label="Input(s)">
                    <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
                    <option value="sam">Externally BowTie2-mapped SAM file</option>
                    <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
                </param>
                <when value="raw">
                    <conditional name="raw_in">
                        <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads">
                            <option value="single" selected="true">One single-end file</option>
                            <option value="multiple">Multiple single-end files</option>
                            <option value="paired">Paired-end files</option>
                        </param>
                        <when value="single">
                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/>
                        </when>
                        <when value="multiple">
                            <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with microbiota reads"/>
                        </when>
                        <when value="paired">
                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/>
                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/>
                        </when>
                    </conditional>
                    <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
                    <section name="mapping" title="Mapping" expanded="true">
                        <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
                            <option value="sensitive">Sensitive</option>
                            <option value="very-sensitive" selected="true">Very sensitive</option>
                            <option value="sensitive-local">Sensitive local</option>
                            <option value="very-sensitive-local">Very sensitive local</option>
                        </param>
                        <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
                    </section>
                </when>
                <when value="sam">
                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
                </when>
                <when value="bowtie2out">
                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run"
                        help="File needs to be generated with MetaPhlAn versions >3.0"/>
                </when>
            </conditional>
            <conditional name="db">
                <param name="db_selector" type="select" label="Database with clade-specific marker genes">
                    <option value="cached" selected="true">Locally cached</option>
                    <option value="history">From history</option>
                </param>
                <when value="cached">
                    <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
                        <options from_data_table="metaphlan_database">
                            <validator message="No MetaPhlAn database is available" type="no_options" />
                        </options>
                    </param>
                </when>
                <when value="history">
                    <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
                    <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
                </when>
            </conditional>
        </section>
        <section name="analysis" title="Analysis" expanded="true">
            <conditional name="analysis_type">
                <param argument="-t" type="select" label="Type of analysis to perform">
                    <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
                    <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
                    <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
                    <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option>
                    <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
                    <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
                </param>
                <when value="rel_ab">
                    <expand macro="tax_lev"/>
                </when>
                <when value="rel_ab_w_read_stats">
                    <expand macro="tax_lev"/>
                </when>
                <when value="reads_map"/>
                <when value="clade_profiles"/>
                <when value="clade_specific_strain_tracker">
                    <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present"
                        help="Markers are also extracted for subclades" />
                    <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
                </when>
                <when value="marker_ab_table">
                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota"
                        help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
                </when>
                <when value="marker_counts"/>
                <when value="marker_pres_table">
                    <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
                </when>
            </conditional>
            <param argument="--min_cu_len" type="integer" value="2000"
                label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
            <param argument="--min_alignment_len" type="integer" optional="true"
                label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
            <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile">
                <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
                <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
                <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
                <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
            </param>
            <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
                <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
                <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
                <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
                <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
                <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
                <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
                <option value="med">med: Median of length-normalized marker counts</option>
            </param>
            <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
            <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
            <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
            <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue=''
                label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?"
                help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
        </section>
        <section name="out" title="Outputs" expanded="true">
            <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
            <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
            <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue=''
                label="Use a species as representative for species groups?"/>
            <param argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue=''
                label="Old MetaPhlAn2 two columns output?"/>
            <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue=''
                label="Report the profiling using the CAMI output format?"/>
            <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue=''
                label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
            <param name="krona_output" type='boolean' checked="false" truevalue='true' falsevalue='false' label="Output for Krona?"/>
        </section>
    </inputs>
    <outputs>
        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" />
        <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
            <filter>inputs['in']['selector'] == "raw"</filter>
        </data>
        <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file">
            <filter>inputs['in']['selector'] == "raw"</filter>
        </data>
        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" />
        <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels" >
            <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>
            <filter>analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter>
        </collection>
        <data name="krona_output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances for Krona">
            <filter>out['krona_output']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <!-- Single GZ file -->
                        <param name="selector" value="single"/>
                        <param name="in" value="no_taxon_input.fasta"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular">
                <assert_contents>
                    <has_text text="UNCLASSIFIED"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular">
                <assert_contents>
                    <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <not_has_text text="p__Actinobacteria"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list" >
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="class"/>
                        <has_n_columns n="17"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum_id"/>
                        <not_has_text text="kingdom_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class_id"/>
                        <not_has_text text="phylum_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order_id"/>
                        <not_has_text text="class_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family_id"/>
                        <not_has_text text="order"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus_id"/>
                        <not_has_text text="family"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species_id"/>
                        <not_has_text text="genus"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains_id"/>
                        <not_has_text text="species_id"/>
                        <has_n_columns n="3"/>
                        <has_n_lines n="1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_n_lines n="1" delta="1"/>
                    <has_size value="1" delta="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <!-- Single GZ file -->
                        <param name="selector" value="single"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list" >
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="Gammaproteobacteria"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="17"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom_id"/>
                        <has_text text="Bacteria"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum_id"/>
                        <not_has_text text="kingdom_id"/>
                        <has_text text="Firmicutes"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class_id"/>
                        <not_has_text text="phylum_id"/>
                        <has_text text="Actinobacteria"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order_id"/>
                        <not_has_text text="class_id"/>
                        <has_text text="Propionibacteriales"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family_id"/>
                        <not_has_text text="order"/>
                        <has_text text="Propionibacteriaceae"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus_id"/>
                        <not_has_text text="family"/>
                        <has_text text="Cutibacterium"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species_id"/>
                        <not_has_text text="genus"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains_id"/>
                        <not_has_text text="species_id"/>
                        <has_n_columns n="3"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_text text="Corynebacterium accolens"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <!-- Multiple GZ file -->
                        <param name="selector" value="multiple"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <!-- Local db -->
                    <param name="db_selector" value="history"/>
                    <param name="bowtie2db" value="test-db.fasta"/>
                    <param name="mpa_pkl" value="test-db.json"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <!-- Paired GZ file -->
                        <param name="selector" value="paired"/>
                        <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/>
                        <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <section name="inputs">
                <conditional name="in">
                    <!-- SAM -->
                    <param name="selector" value="sam"/>
                    <param name="in" value="SRS014464-Anterior_nares.sam"/>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <section name="inputs">
                <conditional name="in">
                    <!-- bowtie2out -->
                    <param name="selector" value="bowtie2out"/>
                    <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <conditional name="in">
                <param name="selector" value="bowtie2out"/>
                <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
            </conditional>
            <section name="mapping">
                <param name="bt2_ps" value="sensite"/>
                <param name="min_mapq_val" value="5"/>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="false"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="false"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="false"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="relative_abundance"/>
                    <has_text text="NCBI_tax_id"/>
                    <has_text text="clade_name"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="6">
            <section name="inputs">
                <conditional name="in">
                    <param name="selector" value="raw"/>
                    <conditional name="raw_in">
                        <!-- Single FASTA file -->
                        <param name="selector" value="single"/>
                        <param name="in" value="SRS014464-Anterior_nares.fasta"/>
                    </conditional>
                    <param name="read_min_len" value="70"/>
                    <section name="mapping">
                        <param name="bt2_ps" value="sensitive"/>
                        <param name="min_mapq_val" value="5"/>
                    </section>
                </conditional>
                <conditional name="db">
                    <!-- Cached db -->
                    <param name="db_selector" value="cached"/>
                    <param name="cached_db" value="test-db-20210409"/>
                </conditional>
            </section>
            <section name="analysis">
                <conditional name="analysis_type">
                    <param name="t" value="rel_ab"/>
                    <conditional name="tax_lev">
                        <param name="tax_lev" value="a"/>
                        <param name="split_levels" value="true"/>
                    </conditional>
                </conditional>
                <param name="min_cu_len" value="2000"/>
                <param name="organism_profiling" value="add_viruses"/>
                <param name="stat" value="avg_g"/>
                <param name="stat_q" value="0.2"/>
                <param name="perc_nonzero" value="0.33"/>
                <param name="ignore_markers" value="marker.txt"/>
                <param name="avoid_disqm" value="true"/>
            </section>
            <section name="out">
                <param name="sample_id_key" value="SampleID"/>
                <param name="sample_id" value="Metaphlan_Analysis"/>
                <param name="use_group_representative" value="false"/>
                <param name="legacy_output" value="true"/>
                <param name="CAMI_format_output" value="false"/>
                <param name="unknown_estimation" value="false"/>
                <param name="krona_output" value="true"/>
            </section>
            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                    <has_text text="SampleID"/>
                    <has_text text="Metaphlan_Analysis"/>
                </assert_contents>
            </output>
            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
                <assert_contents>
                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                    <has_text text="37637__U2I1U8__N579_01580"/>
                </assert_contents>
            </output>
            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
                <assert_contents>
                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                </assert_contents>
            </output>
            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
                <assert_contents>
                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
                </assert_contents>
            </output>
            <output_collection name="levels" type="list" >
                <element name="all" ftype="tabular">
                    <assert_contents>
                        <has_text text="Gammaproteobacteria"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="9"/>
                    </assert_contents>
                </element>
                <element name="kingdom" ftype="tabular">
                    <assert_contents>
                        <has_text text="kingdom"/>
                        <has_text text="Bacteria"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="phylum" ftype="tabular">
                    <assert_contents>
                        <has_text text="phylum"/>
                        <has_text text="Firmicutes"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="class" ftype="tabular">
                    <assert_contents>
                        <has_text text="class"/>
                        <has_text text="Actinobacteria"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="order" ftype="tabular">
                    <assert_contents>
                        <has_text text="order"/>
                        <has_text text="Propionibacteriales"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="family" ftype="tabular">
                    <assert_contents>
                        <has_text text="family"/>
                        <has_text text="Propionibacteriaceae"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="genus" ftype="tabular">
                    <assert_contents>
                        <has_text text="genus"/>
                        <has_text text="Cutibacterium"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="species" ftype="tabular">
                    <assert_contents>
                        <has_text text="species"/>
                        <has_text text="Corynebacterium accolens"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
                <element name="strains" ftype="tabular">
                    <assert_contents>
                        <has_text text="strains"/>
                        <has_n_columns n="2"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="krona_output_file" ftype="tabular">
                <assert_contents>
                    <not_has_text text="k__Bacteria"/>
                    <has_text text="Corynebacterium accolens"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
What it does
============

MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria,
Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level.

MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes
(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:

- unambiguous taxonomic assignments;
- accurate estimation of organismal relative abundance;
- species-level resolution for bacteria, archaea, eukaryotes and viruses;
- strain identification and tracking
- orders of magnitude speedups compared to existing methods.
- microbiota strain-level population genomics

MetaPhlAn clade-abundance estimation
------------------------------------

The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and
strains in particular cases) present in the microbiota obtained from a microbiome sample and their
relative abundance.

Marker level analysis
---------------------

MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
aggregated marker information. Such capability comes with several slightly different flavours and
are a way to perform strain tracking and comparison across multiple samples.

Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the
species present in the community, and then a strain-level profiling can be performed to zoom-in on
specific species of interest. This operation can be performed quickly as it exploits the bowtie2out
intermediate file saved during the execution of the default analysis type.

Inputs
======

Metaphlan takes as input either:

- one or several sequence files in Fasta, FastQ (whether compressed or not)
- a BowTie2 produced SAM file
- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run

It also needs the reference database, which can be locally installed or customized using the dedicated tools.

Outputs
=======

The main output is a tab-separated file with the predicted taxon relative abundances.

It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.


More help and use cases
=======================

To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.

.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Sat, 03 Dec 2022 10:43:21 +0000
parents	ff8f55893e7d
children	2131d7dca455