Mercurial > repos > iuc > metaphlan
diff metaphlan.xml @ 0:f5df500fcc3c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author | iuc |
---|---|
date | Mon, 19 Apr 2021 20:56:20 +0000 |
parents | |
children | b89b0765695d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaphlan.xml Mon Apr 19 20:56:20 2021 +0000 @@ -0,0 +1,716 @@ +<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>to profile the composition of microbial communities</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"/> + <version_command>metaphlan -v</version_command> + <command detect_errors="aggressive"><![CDATA[ +#if $inputs.in.selector == "raw" + #if $inputs.in.raw_in.selector == "single" + #set full_ext=$inputs.in.raw_in.in.datatype.file_ext + #if $full_ext.endswith("gz") + #set $file_path="in" +zcat '$inputs.in.raw_in.in' > '$file_path' +&& + #else if $full_ext.endswith("bz2") + #set $file_path="in" +bzcat '$inputs.in.raw_in.in' > '$file_path' +&& + #else + #set $file_path=$inputs.in.raw_in.in + #end if + #else if $inputs.in.raw_in.selector == "multiple" + #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext + #set file_path="" + #set sep="" + #for $i, $f in enumerate($inputs.in.raw_in.in) + #if $f.datatype.file_ext != $full_ext +echo "Different datatypes for input files" +&& +exit 1 + #end if + #if $full_ext.endswith("gz") + #set fp="input_%s" % ($i) +zcat '$f' > '$fp' +&& + #else if $full_ext.endswith("bz2") + #set fp="input_%s" % ($i) +bzcat '$f' > '$fp' +&& + #else + #set fp=$f + #end if + #set $file_path+="%s%s" % ($sep, $fp) + #set $sep="," + #end for + #else if $inputs.in.raw_in.selector == "paired" + #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext + #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext +echo "Different datatypes for input paired-end files" +&& +exit 1 + #end if + #if $full_ext.endswith("gz") +zcat '$inputs.in.raw_in.in_f' > 'in_f' +&& +zcat '$inputs.in.raw_in.in_r' > 'in_r' +&& + #set file_path="in_f,in_r" + #else if $full_ext.endswith("bz2") +bzcat '$inputs.in.raw_in.in_f' > 'in_f' +&& +bzcat '$inputs.in.raw_in.in_r' > 'in_r' +&& + #set file_path="in_f,in_r" + #else + #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r) + #end if + #end if + + #if $full_ext.startswith("fastq") + #set ext='fastq' + #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2")) + #set ext='fasta' + #else + #set ext=$full_ext + #end if +#end if + +#if $inputs.db.db_selector == "history" +mkdir 'ref_db' +&& +bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db' +&& +python '$__tool_directory__/customizemetadata.py' + transform_json_to_pkl + --json '$inputs.db.mpa_pkl' + --pkl 'ref_db/custom_db.pkl' +&& +#end if + +metaphlan +#if $inputs.in.selector == "raw" + '$file_path' + --input_type '$ext' + --read_min_len $inputs.in.read_min_len + --bt2_ps '$inputs.in.mapping.bt2_ps' + --min_mapq_val $inputs.in.mapping.min_mapq_val +#else + '$inputs.in.in' + --input_type '$inputs.in.selector' +#end if +#if $inputs.db.db_selector == "cached" + --bowtie2db '$inputs.db.cached_db.fields.path' + --index '$inputs.db.cached_db.fields.dbkey' +#else + --bowtie2db 'ref_db/' + --index 'custom_db' +#end if + -t '$analysis.analysis_type.t' +#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" + --tax_lev '$analysis.analysis_type.tax_lev' +#else if $analysis.analysis_type.t == "clade_specific_strain_tracker" + --clade '$analysis.analysis_type.clade' + #if str($analysis.analysis_type.min_ab) != '' + --min_ab $analysis.analysis_type.min_ab + #end if +#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' + --nreads $$analysis.analysis_type.nreads +#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' + --pres_th $analysis.analysis_type.pres_th +#end if + --min_cu_len $analysis.min_cu_len +#if str($analysis.min_alignment_len) != '' + --min_alignment_len $analysis.min_alignment_len +#end if +#if 'add_viruses' in $analysis.organism_profiling + --add_viruses +#end if +#if 'ignore_eukaryotes' in $analysis.organism_profiling + --ignore_eukaryotes +#end if +#if 'ignore_bacteria' in $analysis.organism_profiling + --ignore_bacteria +#end if +#if 'ignore_archaea' in $analysis.organism_profiling + --ignore_archaea +#end if + --stat_q $analysis.stat_q + --perc_nonzero $analysis.perc_nonzero +#if $analysis.ignore_markers + --ignore_markers '$analysis.ignore_markers' +#end if + $analysis.avoid_disqm + --sample_id_key '$out.sample_id_key' + --sample_id '$out.sample_id' + $out.use_group_representative + $out.legacy_output + $out.CAMI_format_output + $out.unknown_estimation + -o '$output_file' + --bowtie2out 'bowtie2out' + -s '$sam_output_file' + --biom '$biom_output_file' + --nproc \${GALAXY_SLOTS:-4} + +#if $inputs.in.selector == "raw" +&& +mv 'bowtie2out' '$bowtie2out' +#end if + ]]></command> + <inputs> + <section name="inputs" title="Inputs" expanded="true"> + <conditional name="in"> + <param name="selector" type="select" label="Input(s)"> + <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option> + <option value="sam">Externally BowTie2-mapped SAM file</option> + <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option> + </param> + <when value="raw"> + <conditional name="raw_in"> + <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads"> + <option value="single" selected="true">One single-end file</option> + <option value="multiple">Multiple single-end files</option> + <option value="paired">Paired-end files</option> + </param> + <when value="single"> + <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/> + </when> + <when value="multiple"> + <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/> + </when> + <when value="paired"> + <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/> + <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/> + </when> + </conditional> + <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> + <section name="mapping" title="Mapping" expanded="true"> + <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files"> + <option value="sensitive">Sensitive</option> + <option value="very-sensitive" selected="true">Very sensitive</option> + <option value="sensite-local">Sensitive local</option> + <option value="very-sensite-local">Very sensitive local</option> + </param> + <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/> + </section> + </when> + <when value="sam"> + <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/> + </when> + <when value="bowtie2out"> + <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" + help="File needs to be generated with MetaPhlAn versions >3.0"/> + </when> + </conditional> + <conditional name="db"> + <param name="db_selector" type="select" label="Database with clade-specific marker genes"> + <option value="cached" selected="true">Locally cached</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> + <options from_data_table="metaphlan_database"> + <validator message="No MetaPhlAn database is available" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> + <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/> + </when> + </conditional> + </section> + <section name="analysis" title="Analysis" expanded="true"> + <conditional name="analysis_type"> + <param argument="-t" type="select" label="Type of analysis to perform"> + <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option> + <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option> + <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> + <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> + <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> + <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option> + <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> + <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> + </param> + <when value="rel_ab"> + <expand macro="tax_lev"/> + </when> + <when value="rel_ab_w_read_stats"> + <expand macro="tax_lev"/> + </when> + <when value="reads_map"/> + <when value="clade_profiles"/> + <when value="clade_specific_strain_tracker"> + <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" + help="Markers are also extracted for subclades" /> + <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> + </when> + <when value="marker_ab_table"> + <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" + help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/> + </when> + <when value="marker_counts"/> + <when value="marker_pres_table"> + <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> + </when> + </conditional> + <param argument="--min_cu_len" type="integer" value="2000" + label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> + <param argument="--min_alignment_len" type="integer" optional="true" + label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> + <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile"> + <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> + <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> + <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> + <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> + </param> + <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> + <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> + <option value="avg_l">avg_l: Average of length-normalized marker counts</option> + <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option> + <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option> + <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option> + <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option> + <option value="med">med: Median of length-normalized marker counts</option> + </param> + <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/> + <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/> + <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/> + <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue='' + label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?" + help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/> + </section> + <section name="out" title="Outputs" expanded="true"> + <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> + <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> + <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' + label="Use a species as representative for species groups?"/> + <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' + label="Old MetaPhlAn2 two columns output?"/> + <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' + label="Report the profiling using the CAMI output format?"/> + <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' + label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> + </section> + </inputs> + <outputs> + <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" /> + <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> + <filter>inputs['in']['selector'] == "raw"</filter> + </data> + <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file"> + <filter>inputs['in']['selector'] == "raw"</filter> + </data> + <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" /> + </outputs> + <tests> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <!-- Single GZ file --> + <param name="selector" value="single"/> + <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> + <has_text text="37637__U2I1U8__N579_01580"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> + <assert_contents> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <!-- Multiple GZ file --> + <param name="selector" value="multiple"/> + <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <!-- Local db --> + <param name="db_selector" value="history"/> + <param name="bowtie2db" value="test-db.fasta"/> + <param name="mpa_pkl" value="test-db.json"/> + </conditional> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> + <has_text text="37637__U2I1U8__N579_01580"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> + <assert_contents> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <!-- Paired GZ file --> + <param name="selector" value="paired"/> + <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/> + <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> + <has_text text="37637__U2I1U8__N579_01580"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> + <assert_contents> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <section name="inputs"> + <conditional name="in"> + <!-- SAM --> + <param name="selector" value="sam"/> + <param name="in" value="SRS014464-Anterior_nares.sam"/> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <section name="inputs"> + <conditional name="in"> + <!-- bowtie2out --> + <param name="selector" value="bowtie2out"/> + <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <conditional name="in"> + <param name="selector" value="bowtie2out"/> + <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> + </conditional> + <section name="mapping"> + <param name="bt2_ps" value="sensite"/> + <param name="min_mapq_val" value="5"/> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <!-- Single FASTA file --> + <param name="selector" value="single"/> + <param name="in" value="SRS014464-Anterior_nares.fasta"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="ignore_markers" value="marker.txt"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="true"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + </section> + <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="SampleID"/> + <has_text text="Metaphlan_Analysis"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> + <assert_contents> + <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> + <has_text text="37637__U2I1U8__N579_01580"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> + <assert_contents> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> + <assert_contents> + <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +What it does +============ + +MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, +Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. + +MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes +(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing: + +- unambiguous taxonomic assignments; +- accurate estimation of organismal relative abundance; +- species-level resolution for bacteria, archaea, eukaryotes and viruses; +- strain identification and tracking +- orders of magnitude speedups compared to existing methods. +- metagenomic strain-level population genomics + +MetaPhlAn clade-abundance estimation +------------------------------------ + +The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and +strains in particular cases) present in the metagenome obtained from a microbiome sample and their +relative abundance. + +Marker level analysis +--------------------- + +MetaPhlAn introduces the capability of characterizing organisms at the strain level using non +aggregated marker information. Such capability comes with several slightly different flavours and +are a way to perform strain tracking and comparison across multiple samples. + +Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the +species present in the community, and then a strain-level profiling can be performed to zoom-in into +specific species of interest. This operation can be performed quickly as it exploits the bowtie2out +intermediate file saved during the execution of the default analysis type. + +Inputs +====== + +Metaphlan takes as input either: + +- one or several sequence files in Fasta, FastQ (compressed or not) +- a BowTie2 produced SAM file +- an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run + +It also need the reference database, which can be locally installed or customized using the dedicated tools. + +Outputs +======= + +The main output file is a tab-separated file with the predicted taxon relative abundances. + +It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. + + +More help and use cases +======================= + +To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_. + +.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage + + ]]></help> + <expand macro="citations"/> +</tool>