diff metaphlan.xml @ 16:56ff60ec416d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/metaphlan commit 26cbf16569d13b7ddbbbd99fc49ec15ce312a992
author iuc
date Fri, 14 Nov 2025 13:47:10 +0000
parents eca2e2e20436
children
line wrap: on
line diff
--- a/metaphlan.xml	Fri Feb 21 21:30:21 2025 +0000
+++ b/metaphlan.xml	Fri Nov 14 13:47:10 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@">
+<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>to profile the composition of microbial communities</description>
     <macros>
         <import>macros.xml</import>
@@ -148,41 +148,31 @@
     #end if
 #end if
 #if $inputs.db.db_selector == "cached"
-    --bowtie2db '$inputs.db.cached_db.fields.path'
+    --db_dir '$inputs.db.cached_db.fields.path'
     --index '$inputs.db.cached_db.fields.dbkey'
+    #if $inputs.db.viral_analysis.profile_vsc
+        $inputs.db.viral_analysis.profile_vsc
+        --vsc_out '$vcs_breath_coverage'
+        --vsc_breadth $inputs.db.viral_analysis.vsc_breadth
+    #end if
 #else
-    --bowtie2db 'ref_db/'
+    --db_dir 'ref_db/'
     --index 'custom_db'
 #end if
     -t '$analysis.analysis_type.t'
 #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
     --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
-#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
-    --clade '$analysis.analysis_type.clade'
-    #if str($analysis.analysis_type.min_ab) != ''
-    --min_ab $analysis.analysis_type.min_ab
-    #end if
 #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
     --nreads $$analysis.analysis_type.nreads
 #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
     --pres_th $analysis.analysis_type.pres_th
 #end if
-    --min_cu_len $analysis.min_cu_len
 #if str($analysis.min_alignment_len) != ''
     --min_alignment_len $analysis.min_alignment_len
 #end if
-#if 'add_viruses' in $analysis.organism_profiling
-    --add_viruses
-#end if
-#if 'ignore_eukaryotes' in $analysis.organism_profiling
-    --ignore_eukaryotes
-#end if
-#if 'ignore_bacteria' in $analysis.organism_profiling
-    --ignore_bacteria
-#end if
-#if 'ignore_archaea' in $analysis.organism_profiling
-    --ignore_archaea
-#end if
+
+#echo " ".join(["--" + o for o in $analysis.organism_profiling])
+
     --stat $analysis.stat
     --stat_q $analysis.stat_q
     --perc_nonzero $analysis.perc_nonzero
@@ -193,19 +183,17 @@
     --sample_id_key '$out.sample_id_key'
     --sample_id '$out.sample_id'
     $out.use_group_representative
-    $out.legacy_output
     $out.CAMI_format_output
-    $out.unclassified_estimation
-    -o '$output_file'
-    --bowtie2out 'bowtie2out'
+    $out.skip_unclassified_estimation
+    #if $out.biom_format_output
+        $out.biom_format_output
+        -o '$biom_output_file'
+    #else
+        -o '$output_file'
+    #end if
+    --mapout 'mapout'
     -s '$sam_output_file'
-    --biom '$biom_output_file'
     --nproc \${GALAXY_SLOTS:-4}
-#if $viral_analysis.profile_vsc
-    $viral_analysis.profile_vsc
-    --vsc_out '$vcs_breath_coverage'
-    --vsc_breadth $viral_analysis.vsc_breadth
-#end if
 
 #if $subsample.selector != "no"
     #if $subsample.selector == "single"
@@ -226,7 +214,7 @@
 
 #if $inputs.in.selector == "raw"
 &&
-mv 'bowtie2out' '$bowtie2out'
+mv 'mapout' '$mapout'
 #end if
 
 
@@ -239,7 +227,6 @@
             split_levels
             --metaphlan_output '$output_file'
             --outdir 'split_levels'
-            $out.legacy_output
     #end if
 #end if
 
@@ -257,7 +244,7 @@
                 <param name="selector" type="select" label="Input(s)">
                     <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option>
                     <option value="sam">Externally BowTie2-mapped SAM file</option>
-                    <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
+                    <option value="mapout">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option>
                 </param>
                 <when value="raw">
                     <conditional name="raw_in">
@@ -295,7 +282,7 @@
                 <when value="sam">
                     <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/>
                 </when>
-                <when value="bowtie2out">
+                <when value="mapout">
                     <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions &gt;3.0"/>
                 </when>
             </conditional>
@@ -311,9 +298,19 @@
                             <validator message="No compatible MetaPhlAn database is available" type="no_options"/>
                         </options>
                     </param>
+                    <conditional name="viral_analysis">
+                        <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach">
+                            <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option>
+                            <option value="" selected="true">No</option>
+                        </param>
+                        <when value="--profile_vsc">
+                            <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/>
+                        </when>
+                        <when value=""/>
+                    </conditional>
                 </when>
                 <when value="history">
-                    <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
+                    <param argument="--db_dir" name="bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
                     <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
                 </when>
             </conditional>
@@ -323,11 +320,8 @@
                 <param argument="-t" type="select" label="Type of analysis to perform">
                     <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option>
                     <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option>
-                    <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
                     <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
-                    <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
                     <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when &gt; 0.0 and normalized by microbiota size if number of reads is specified)</option>
-                    <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
                     <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
                 </param>
                 <when value="rel_ab">
@@ -336,27 +330,21 @@
                 <when value="rel_ab_w_read_stats">
                     <expand macro="tax_lev"/>
                 </when>
-                <when value="reads_map"/>
                 <when value="clade_profiles"/>
-                <when value="clade_specific_strain_tracker">
-                    <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/>
-                    <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
-                </when>
                 <when value="marker_ab_table">
                     <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/>
                 </when>
-                <when value="marker_counts"/>
                 <when value="marker_pres_table">
                     <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
                 </when>
             </conditional>
-            <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
             <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
             <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true">
-                <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
                 <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
                 <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
                 <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
+                <option value="ignore_ksgbs">Do not profile known SGBs</option>
+                <option value="ignore_usgbs">Do not profile unknown SGBs</option>
             </param>
             <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
                 <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
@@ -388,37 +376,31 @@
                 <expand macro="subsample_common"/>
             </when>
         </conditional>
-        <conditional name="viral_analysis">
-            <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach">
-                <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option>
-                <option value="" selected="true">No</option>
-            </param>
-            <when value="--profile_vsc">
-                <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/>
-            </when>
-            <when value=""/>
-        </conditional>
         <section name="out" title="Outputs" expanded="true">
             <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
             <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
             <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/>
-            <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/>
             <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/>
-            <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
+            <param argument="--skip_unclassified_estimation" type="boolean" truevalue="--skip_unclassified_estimation" falsevalue="" checked="false" label="Do not scale relative abundances to the estimate unclassified taxa"/>
+            <param argument="--biom_format_output" type="boolean" truevalue="--biom_format_output" falsevalue="" checked="false" label="Report the profiling using the biom output format"/>
             <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/>
         </section>
         <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) -->
         <param name="test" type="hidden" value="false"/>
     </inputs>
     <outputs>
-        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/>
-        <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
+        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances">
+            <filter>not out['biom_format_output']</filter>
+        </data>
+        <data name="mapout" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
             <filter>inputs['in']['selector'] == "raw"</filter>
         </data>
         <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file">
             <filter>inputs['in']['selector'] == "raw"</filter>
         </data>
-        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/>
+        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file">
+            <filter>out['biom_format_output']</filter>
+        </data>
         <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels">
             <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="split_levels/" format="tabular"/>
             <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter>
@@ -427,7 +409,7 @@
             <filter>out['krona_output']</filter>
         </data>
         <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage">
-            <filter>viral_analysis['profile_vsc']</filter>
+            <filter>inputs['db']['db_selector'] == "cached" and inputs['db']['viral_analysis']['profile_vsc'] != ""</filter>
         </data>
         <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads">
             <filter>subsample['selector'] == 'single'</filter>
@@ -440,7 +422,7 @@
     </outputs>
     <tests>
         <!-- Single GZ file, Cached db -->
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="5">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -467,8 +449,6 @@
                         <param name="split_levels" value="true"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -478,9 +458,8 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="true"/>
             </section>
             <output name="output_file" ftype="tabular">
@@ -488,7 +467,7 @@
                     <has_text text="UNCLASSIFIED"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
                     <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                     <has_n_lines n="2"/>
@@ -499,12 +478,6 @@
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1">
-                <assert_contents>
-                    <not_has_text text="k__Bacteria"/>
-                    <not_has_text text="p__Actinobacteria"/>
-                </assert_contents>
-            </output>
             <output_collection name="levels" type="list">
                 <element name="all" ftype="tabular">
                     <assert_contents>
@@ -589,7 +562,7 @@
             </assert_stderr>
         </test>
         <!-- Single GZ file, Cached db -->
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="5">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -616,8 +589,6 @@
                         <param name="split_levels" value="true"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -627,32 +598,29 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="true"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="12357"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="10504"/>
                     <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                     <has_text text="37637__U2I1U8__N579_01580"/>
                 </assert_contents>
             </output>
-            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
+            <output name="sam_output_file" ftype="sam">
                 <assert_contents>
+                    <has_size size="54554"/>
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
-                </assert_contents>
-            </output>
             <output_collection name="levels" type="list">
                 <element name="all" ftype="tabular">
                     <assert_contents>
@@ -736,7 +704,7 @@
             </assert_stderr>
         </test>
         <!-- Multiple GZ file, Local db-->
-        <test expect_num_outputs="4">
+        <test expect_num_outputs="3">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -764,8 +732,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -775,41 +741,38 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="12298"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="relative_abundance"/>
                     <has_text text="NCBI_tax_id"/>
                     <has_text text="clade_name"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="20961"/>
                     <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                     <has_text text="37637__U2I1U8__N579_01580"/>
                 </assert_contents>
             </output>
-            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
+            <output name="sam_output_file" ftype="sam">
                 <assert_contents>
+                    <has_size size="104838"/>
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
-                </assert_contents>
-            </output>
             <assert_stderr>
                 <has_text text="Downloading" negate="true"/>
             </assert_stderr>
         </test>
         <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
-        <test expect_num_outputs="7">
+        <test expect_num_outputs="6">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -837,8 +800,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -853,21 +814,22 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="9808"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="relative_abundance"/>
                     <has_text text="NCBI_tax_id"/>
                     <has_text text="clade_name"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="52535"/>
                     <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/>
                     <has_text text="90240__A0A378QWM4__NCTC12877_00123"/>
                 </assert_contents>
@@ -878,11 +840,6 @@
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
-                </assert_contents>
-            </output>
             <output_collection name="subsample_paired" type="paired">
                 <element name="forward">
                     <assert_contents>
@@ -900,7 +857,7 @@
             </assert_stderr>
         </test>
         <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) -->
-        <test expect_num_outputs="7">
+        <test expect_num_outputs="6">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -932,8 +889,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -948,21 +903,22 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="9808"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="relative_abundance"/>
                     <has_text text="NCBI_tax_id"/>
                     <has_text text="clade_name"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="52535"/>
                     <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/>
                     <has_text text="90240__A0A378QWM4__NCTC12877_00123"/>
                 </assert_contents>
@@ -973,11 +929,6 @@
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
-                </assert_contents>
-            </output>
             <output_collection name="subsample_paired" type="paired">
                 <element name="forward">
                     <assert_contents>
@@ -995,7 +946,7 @@
             </assert_stderr>
         </test>
         <!-- Paired fastq file as collection, Cached db -->
-        <test expect_num_outputs="4">
+        <test expect_num_outputs="3">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -1027,8 +978,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -1041,21 +990,22 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
-                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
-                    <has_text text="relative_abundance"/>
-                    <has_text text="NCBI_tax_id"/>
-                    <has_text text="clade_name"/>
-                </assert_contents>
-            </output>
-            <output name="bowtie2out" ftype="tabular">
+                    <has_size size="12356"/>
+                        <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                        <has_text text="relative_abundance"/>
+                        <has_text text="NCBI_tax_id"/>
+                        <has_text text="clade_name"/>
+                    </assert_contents>
+                </output>
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="20961"/>
                     <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/>
                     <has_text text="90240__A0A378QWM4__NCTC12877_00123"/>
                 </assert_contents>
@@ -1066,17 +1016,12 @@
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
-                </assert_contents>
-            </output>
             <assert_stderr>
                 <has_text text="Downloading" negate="true"/>
             </assert_stderr>
         </test>
         <!-- SAM, cached DB -->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="1">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="sam"/>
@@ -1095,8 +1040,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -1106,34 +1049,29 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="12281"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="relative_abundance"/>
                     <has_text text="NCBI_tax_id"/>
                     <has_text text="clade_name"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
-                </assert_contents>
-            </output>
             <assert_stderr>
                 <has_text text="Downloading" negate="true"/>
             </assert_stderr>
         </test>
-        <!-- bowtie2out, cached DB -->
-        <test expect_num_outputs="2">
+        <!-- mapout, cached DB -->
+        <test expect_num_outputs="1">
             <section name="inputs">
                 <conditional name="in">
-                    <param name="selector" value="bowtie2out"/>
-                    <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
+                    <param name="selector" value="mapout"/>
+                    <param name="in" value="SRS014464-Anterior_nares-mapout.tabular"/>
                 </conditional>
                 <conditional name="db">
                     <param name="db_selector" value="cached"/>
@@ -1148,8 +1086,6 @@
                         <param name="split_levels" value="false"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -1159,30 +1095,25 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="false"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="false"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="12273"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="relative_abundance"/>
                     <has_text text="NCBI_tax_id"/>
                     <has_text text="clade_name"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
-                </assert_contents>
-            </output>
             <assert_stderr>
                 <has_text text="Downloading" negate="true"/>
             </assert_stderr>
         </test>
         <!-- Single FASTA file, Cached db -->
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="5">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -1209,8 +1140,6 @@
                         <param name="split_levels" value="true"/>
                     </conditional>
                 </conditional>
-                <param name="min_cu_len" value="2000"/>
-                <param name="organism_profiling" value="add_viruses"/>
                 <param name="stat" value="avg_g"/>
                 <param name="stat_q" value="0.2"/>
                 <param name="perc_nonzero" value="0.33"/>
@@ -1221,95 +1150,92 @@
                 <param name="sample_id_key" value="SampleID"/>
                 <param name="sample_id" value="Metaphlan_Analysis"/>
                 <param name="use_group_representative" value="false"/>
-                <param name="legacy_output" value="true"/>
                 <param name="CAMI_format_output" value="false"/>
-                <param name="unclassified_estimation" value="false"/>
+                <param name="skip_unclassified_estimation" value="false"/>
                 <param name="krona_output" value="true"/>
             </section>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="11869"/>
                     <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
                     <has_text text="SampleID"/>
                     <has_text text="Metaphlan_Analysis"/>
                 </assert_contents>
             </output>
-            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
+            <output name="mapout" ftype="tabular">
                 <assert_contents>
+                    <has_size size="10504"/>
                     <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
                     <has_text text="37637__U2I1U8__N579_01580"/>
                 </assert_contents>
             </output>
-            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
+            <output name="sam_output_file" ftype="sam">
                 <assert_contents>
+                    <has_size size="54554"/>
                     <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
                 </assert_contents>
             </output>
-            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
-                <assert_contents>
-                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
-                </assert_contents>
-            </output>
             <output_collection name="levels" type="list">
                 <element name="all" ftype="tabular">
                     <assert_contents>
                         <has_text text="Gammaproteobacteria"/>
                         <has_text text="Corynebacterium accolens"/>
-                        <has_n_columns n="9"/>
+                        <has_n_columns n="17"/>
                     </assert_contents>
                 </element>
                 <element name="class" ftype="tabular">
                     <assert_contents>
                         <has_text text="class"/>
                         <has_text text="Actinobacteria"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="family" ftype="tabular">
                     <assert_contents>
                         <has_text text="family"/>
                         <has_text text="Propionibacteriaceae"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="genus" ftype="tabular">
                     <assert_contents>
                         <has_text text="genus"/>
                         <has_text text="Cutibacterium"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="kingdom" ftype="tabular">
                     <assert_contents>
                         <has_text text="kingdom"/>
                         <has_text text="Bacteria"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="order" ftype="tabular">
                     <assert_contents>
                         <has_text text="order"/>
                         <has_text text="Propionibacteriales"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="phylum" ftype="tabular">
                     <assert_contents>
                         <has_text text="phylum"/>
                         <has_text text="Firmicutes"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="species" ftype="tabular">
                     <assert_contents>
                         <has_text text="species"/>
                         <has_text text="Corynebacterium accolens"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
                 <element name="strains" ftype="tabular">
                     <assert_contents>
                         <has_text text="strains"/>
-                        <has_n_columns n="2"/>
+                        <has_n_columns n="3"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -1326,7 +1252,7 @@
         </test>
         <!-- Check a non-default analysis mode 
              and viral analysis -->
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="5">
             <section name="inputs">
                 <conditional name="in">
                     <param name="selector" value="raw"/>
@@ -1338,6 +1264,9 @@
                 <conditional name="db">
                     <param name="db_selector" value="cached"/>
                     <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/>
+                    <conditional name="viral_analysis">
+                        <param name="profile_vsc" value="--profile_vsc"/>
+                    </conditional>
                 </conditional>
             </section>
             <section name="analysis">
@@ -1345,17 +1274,15 @@
                     <param name="t" value="marker_ab_table"/>
                 </conditional>
             </section>
-            <conditional name="viral_analysis">
-                <param name="profile_vsc" value="--profile_vsc"/>
-            </conditional>
             <conditional name="subsample">
                 <param name="selector" value="single"/>
                 <param name="subsampling" value="10000"/>
                 <param name="subsampling_seed" value="42"/>
             </conditional>
             <param name="test" value="true"/>
-            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
+            <output name="output_file" ftype="tabular">
                 <assert_contents>
+                    <has_size size="7690"/>
                     <has_text text="SGB7017__MKDPKOFL_00679"/>
                     <has_text text="SampleID"/>
                     <has_text text="Metaphlan_Analysis"/>
@@ -1417,7 +1344,7 @@
 
 Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the 
 species present in the community, and then a strain-level profiling can be performed to zoom-in on 
-specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 
+specific species of interest. This operation can be performed quickly as it exploits the mapout 
 intermediate file saved during the execution of the default analysis type.
 
 Inputs
@@ -1436,7 +1363,7 @@
 
 The main output is a tab-separated file with the predicted taxon relative abundances.
 
-It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
+It also generates a BIOM file and some intermediate files (SAM and mapout) if sequence files are given as inputs.
 
 
 More help and use cases