Mercurial > repos > iuc > metaphlan
diff metaphlan.xml @ 16:56ff60ec416d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/metaphlan commit 26cbf16569d13b7ddbbbd99fc49ec15ce312a992
| author | iuc |
|---|---|
| date | Fri, 14 Nov 2025 13:47:10 +0000 |
| parents | eca2e2e20436 |
| children |
line wrap: on
line diff
--- a/metaphlan.xml Fri Feb 21 21:30:21 2025 +0000 +++ b/metaphlan.xml Fri Nov 14 13:47:10 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@"> +<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>to profile the composition of microbial communities</description> <macros> <import>macros.xml</import> @@ -148,41 +148,31 @@ #end if #end if #if $inputs.db.db_selector == "cached" - --bowtie2db '$inputs.db.cached_db.fields.path' + --db_dir '$inputs.db.cached_db.fields.path' --index '$inputs.db.cached_db.fields.dbkey' + #if $inputs.db.viral_analysis.profile_vsc + $inputs.db.viral_analysis.profile_vsc + --vsc_out '$vcs_breath_coverage' + --vsc_breadth $inputs.db.viral_analysis.vsc_breadth + #end if #else - --bowtie2db 'ref_db/' + --db_dir 'ref_db/' --index 'custom_db' #end if -t '$analysis.analysis_type.t' #if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats" --tax_lev '$analysis.analysis_type.tax_lev.tax_lev' -#else if $analysis.analysis_type.t == "clade_specific_strain_tracker" - --clade '$analysis.analysis_type.clade' - #if str($analysis.analysis_type.min_ab) != '' - --min_ab $analysis.analysis_type.min_ab - #end if #else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != '' --nreads $$analysis.analysis_type.nreads #else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != '' --pres_th $analysis.analysis_type.pres_th #end if - --min_cu_len $analysis.min_cu_len #if str($analysis.min_alignment_len) != '' --min_alignment_len $analysis.min_alignment_len #end if -#if 'add_viruses' in $analysis.organism_profiling - --add_viruses -#end if -#if 'ignore_eukaryotes' in $analysis.organism_profiling - --ignore_eukaryotes -#end if -#if 'ignore_bacteria' in $analysis.organism_profiling - --ignore_bacteria -#end if -#if 'ignore_archaea' in $analysis.organism_profiling - --ignore_archaea -#end if + +#echo " ".join(["--" + o for o in $analysis.organism_profiling]) + --stat $analysis.stat --stat_q $analysis.stat_q --perc_nonzero $analysis.perc_nonzero @@ -193,19 +183,17 @@ --sample_id_key '$out.sample_id_key' --sample_id '$out.sample_id' $out.use_group_representative - $out.legacy_output $out.CAMI_format_output - $out.unclassified_estimation - -o '$output_file' - --bowtie2out 'bowtie2out' + $out.skip_unclassified_estimation + #if $out.biom_format_output + $out.biom_format_output + -o '$biom_output_file' + #else + -o '$output_file' + #end if + --mapout 'mapout' -s '$sam_output_file' - --biom '$biom_output_file' --nproc \${GALAXY_SLOTS:-4} -#if $viral_analysis.profile_vsc - $viral_analysis.profile_vsc - --vsc_out '$vcs_breath_coverage' - --vsc_breadth $viral_analysis.vsc_breadth -#end if #if $subsample.selector != "no" #if $subsample.selector == "single" @@ -226,7 +214,7 @@ #if $inputs.in.selector == "raw" && -mv 'bowtie2out' '$bowtie2out' +mv 'mapout' '$mapout' #end if @@ -239,7 +227,6 @@ split_levels --metaphlan_output '$output_file' --outdir 'split_levels' - $out.legacy_output #end if #end if @@ -257,7 +244,7 @@ <param name="selector" type="select" label="Input(s)"> <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option> <option value="sam">Externally BowTie2-mapped SAM file</option> - <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option> + <option value="mapout">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option> </param> <when value="raw"> <conditional name="raw_in"> @@ -295,7 +282,7 @@ <when value="sam"> <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/> </when> - <when value="bowtie2out"> + <when value="mapout"> <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions >3.0"/> </when> </conditional> @@ -311,9 +298,19 @@ <validator message="No compatible MetaPhlAn database is available" type="no_options"/> </options> </param> + <conditional name="viral_analysis"> + <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach"> + <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option> + <option value="" selected="true">No</option> + </param> + <when value="--profile_vsc"> + <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/> + </when> + <when value=""/> + </conditional> </when> <when value="history"> - <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> + <param argument="--db_dir" name="bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/> </when> </conditional> @@ -323,11 +320,8 @@ <param argument="-t" type="select" label="Type of analysis to perform"> <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option> <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option> - <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> - <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> - <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> </param> <when value="rel_ab"> @@ -336,27 +330,21 @@ <when value="rel_ab_w_read_stats"> <expand macro="tax_lev"/> </when> - <when value="reads_map"/> <when value="clade_profiles"/> - <when value="clade_specific_strain_tracker"> - <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" help="Markers are also extracted for subclades"/> - <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> - </when> <when value="marker_ab_table"> <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> </when> - <when value="marker_counts"/> <when value="marker_pres_table"> <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/> </when> </conditional> - <param argument="--min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/> <param argument="--min_alignment_len" type="integer" optional="true" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/> <param name="organism_profiling" type="select" optional="true" label="Organisms to profile" multiple="true"> - <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option> <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option> <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option> <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option> + <option value="ignore_ksgbs">Do not profile known SGBs</option> + <option value="ignore_usgbs">Do not profile unknown SGBs</option> </param> <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances"> <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option> @@ -388,37 +376,31 @@ <expand macro="subsample_common"/> </when> </conditional> - <conditional name="viral_analysis"> - <param argument="--profile_vsc" type="select" label="Profile Viruses with VSCs approach"> - <option value="--profile_vsc">Yes (requires FASTQ input and reference data with VSG fasta)</option> - <option value="" selected="true">No</option> - </param> - <when value="--profile_vsc"> - <param argument="--vsc_breadth" type="float" min="0" max="1" value="0.75" label="Minimum Breadth of Coverage" help="Minimum coverage (fraction) for a Viral Group to be reported."/> - </when> - <when value=""/> - </conditional> <section name="out" title="Outputs" expanded="true"> <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/> <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/> <param argument="--use_group_representative" type="boolean" truevalue="--use_group_representative" falsevalue="" checked="false" label="Use a species as representative for species groups?"/> - <param argument="--legacy-output" type="boolean" truevalue="--legacy-output" falsevalue="" checked="false" label="Old MetaPhlAn2 two columns output?"/> <param argument="--CAMI_format_output" type="boolean" truevalue="--CAMI_format_output" falsevalue="" checked="false" label="Report the profiling using the CAMI output format?"/> - <param argument="--unclassified_estimation" type="boolean" truevalue="--unclassified_estimation" falsevalue="" checked="false" label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/> + <param argument="--skip_unclassified_estimation" type="boolean" truevalue="--skip_unclassified_estimation" falsevalue="" checked="false" label="Do not scale relative abundances to the estimate unclassified taxa"/> + <param argument="--biom_format_output" type="boolean" truevalue="--biom_format_output" falsevalue="" checked="false" label="Report the profiling using the biom output format"/> <param name="krona_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output for Krona?"/> </section> <!-- enabling this in tests will allow metaphlan to download reference data (we do this only with the smallish TOY DB) --> <param name="test" type="hidden" value="false"/> </inputs> <outputs> - <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"/> - <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> + <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances"> + <filter>not out['biom_format_output']</filter> + </data> + <data name="mapout" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output"> <filter>inputs['in']['selector'] == "raw"</filter> </data> <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file"> <filter>inputs['in']['selector'] == "raw"</filter> </data> - <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"/> + <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file"> + <filter>out['biom_format_output']</filter> + </data> <collection name="levels" type="list" label="${tool.name} on ${on_string}: Predicted taxon relative abundances at each taxonomic levels"> <discover_datasets pattern="(?P<designation>.+)" directory="split_levels/" format="tabular"/> <filter>analysis['analysis_type']['t'] in ['rel_ab', 'rel_ab_w_read_stats'] and analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']</filter> @@ -427,7 +409,7 @@ <filter>out['krona_output']</filter> </data> <data name="vcs_breath_coverage" format="tabular" label="${tool.name} on ${on_string}: VSCs breadth-of-coverage"> - <filter>viral_analysis['profile_vsc']</filter> + <filter>inputs['db']['db_selector'] == "cached" and inputs['db']['viral_analysis']['profile_vsc'] != ""</filter> </data> <data name="subsample_single" format="fastqsanger" from_work_dir="subsampled.out" label="${tool.name} on ${on_string}: Subsampled reads"> <filter>subsample['selector'] == 'single'</filter> @@ -440,7 +422,7 @@ </outputs> <tests> <!-- Single GZ file, Cached db --> - <test expect_num_outputs="6"> + <test expect_num_outputs="5"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -467,8 +449,6 @@ <param name="split_levels" value="true"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -478,9 +458,8 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> <output name="output_file" ftype="tabular"> @@ -488,7 +467,7 @@ <has_text text="UNCLASSIFIED"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular"> + <output name="mapout" ftype="tabular"> <assert_contents> <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> <has_n_lines n="2"/> @@ -499,12 +478,6 @@ <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1"> - <assert_contents> - <not_has_text text="k__Bacteria"/> - <not_has_text text="p__Actinobacteria"/> - </assert_contents> - </output> <output_collection name="levels" type="list"> <element name="all" ftype="tabular"> <assert_contents> @@ -589,7 +562,7 @@ </assert_stderr> </test> <!-- Single GZ file, Cached db --> - <test expect_num_outputs="6"> + <test expect_num_outputs="5"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -616,8 +589,6 @@ <param name="split_levels" value="true"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -627,32 +598,29 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="12357"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="10504"/> <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> <has_text text="37637__U2I1U8__N579_01580"/> </assert_contents> </output> - <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> + <output name="sam_output_file" ftype="sam"> <assert_contents> + <has_size size="54554"/> <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> <output_collection name="levels" type="list"> <element name="all" ftype="tabular"> <assert_contents> @@ -736,7 +704,7 @@ </assert_stderr> </test> <!-- Multiple GZ file, Local db--> - <test expect_num_outputs="4"> + <test expect_num_outputs="3"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -764,8 +732,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -775,41 +741,38 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="12298"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="relative_abundance"/> <has_text text="NCBI_tax_id"/> <has_text text="clade_name"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="20961"/> <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> <has_text text="37637__U2I1U8__N579_01580"/> </assert_contents> </output> - <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size"> + <output name="sam_output_file" ftype="sam"> <assert_contents> + <has_size size="104838"/> <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> <assert_stderr> <has_text text="Downloading" negate="true"/> </assert_stderr> </test> <!-- Paired GZ file, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> - <test expect_num_outputs="7"> + <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -837,8 +800,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -853,21 +814,22 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="9808"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="relative_abundance"/> <has_text text="NCBI_tax_id"/> <has_text text="clade_name"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="52535"/> <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/> <has_text text="90240__A0A378QWM4__NCTC12877_00123"/> </assert_contents> @@ -878,11 +840,6 @@ <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> - </assert_contents> - </output> <output_collection name="subsample_paired" type="paired"> <element name="forward"> <assert_contents> @@ -900,7 +857,7 @@ </assert_stderr> </test> <!-- Paired GZ file as collection, Cached db (note sumsample_paired and the included forward and reverse reads are counted separatelym because they are all statically defined) --> - <test expect_num_outputs="7"> + <test expect_num_outputs="6"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -932,8 +889,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -948,21 +903,22 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="9808"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="relative_abundance"/> <has_text text="NCBI_tax_id"/> <has_text text="clade_name"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size"> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="52535"/> <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/> <has_text text="90240__A0A378QWM4__NCTC12877_00123"/> </assert_contents> @@ -973,11 +929,6 @@ <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> - </assert_contents> - </output> <output_collection name="subsample_paired" type="paired"> <element name="forward"> <assert_contents> @@ -995,7 +946,7 @@ </assert_stderr> </test> <!-- Paired fastq file as collection, Cached db --> - <test expect_num_outputs="4"> + <test expect_num_outputs="3"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -1027,8 +978,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -1041,21 +990,22 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> - <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> - <has_text text="relative_abundance"/> - <has_text text="NCBI_tax_id"/> - <has_text text="clade_name"/> - </assert_contents> - </output> - <output name="bowtie2out" ftype="tabular"> + <has_size size="12356"/> + <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> + <has_text text="relative_abundance"/> + <has_text text="NCBI_tax_id"/> + <has_text text="clade_name"/> + </assert_contents> + </output> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="20961"/> <has_text text="HWI-EAS109_102883399:3:107:9938:7093/1"/> <has_text text="90240__A0A378QWM4__NCTC12877_00123"/> </assert_contents> @@ -1066,17 +1016,12 @@ <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> - </assert_contents> - </output> <assert_stderr> <has_text text="Downloading" negate="true"/> </assert_stderr> </test> <!-- SAM, cached DB --> - <test expect_num_outputs="2"> + <test expect_num_outputs="1"> <section name="inputs"> <conditional name="in"> <param name="selector" value="sam"/> @@ -1095,8 +1040,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -1106,34 +1049,29 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="12281"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="relative_abundance"/> <has_text text="NCBI_tax_id"/> <has_text text="clade_name"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> <assert_stderr> <has_text text="Downloading" negate="true"/> </assert_stderr> </test> - <!-- bowtie2out, cached DB --> - <test expect_num_outputs="2"> + <!-- mapout, cached DB --> + <test expect_num_outputs="1"> <section name="inputs"> <conditional name="in"> - <param name="selector" value="bowtie2out"/> - <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/> + <param name="selector" value="mapout"/> + <param name="in" value="SRS014464-Anterior_nares-mapout.tabular"/> </conditional> <conditional name="db"> <param name="db_selector" value="cached"/> @@ -1148,8 +1086,6 @@ <param name="split_levels" value="false"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -1159,30 +1095,25 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="false"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="false"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="12273"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="relative_abundance"/> <has_text text="NCBI_tax_id"/> <has_text text="clade_name"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> <assert_stderr> <has_text text="Downloading" negate="true"/> </assert_stderr> </test> <!-- Single FASTA file, Cached db --> - <test expect_num_outputs="6"> + <test expect_num_outputs="5"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -1209,8 +1140,6 @@ <param name="split_levels" value="true"/> </conditional> </conditional> - <param name="min_cu_len" value="2000"/> - <param name="organism_profiling" value="add_viruses"/> <param name="stat" value="avg_g"/> <param name="stat_q" value="0.2"/> <param name="perc_nonzero" value="0.33"/> @@ -1221,95 +1150,92 @@ <param name="sample_id_key" value="SampleID"/> <param name="sample_id" value="Metaphlan_Analysis"/> <param name="use_group_representative" value="false"/> - <param name="legacy_output" value="true"/> <param name="CAMI_format_output" value="false"/> - <param name="unclassified_estimation" value="false"/> + <param name="skip_unclassified_estimation" value="false"/> <param name="krona_output" value="true"/> </section> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="11869"/> <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/> <has_text text="SampleID"/> <has_text text="Metaphlan_Analysis"/> </assert_contents> </output> - <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size"> + <output name="mapout" ftype="tabular"> <assert_contents> + <has_size size="10504"/> <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> <has_text text="37637__U2I1U8__N579_01580"/> </assert_contents> </output> - <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size"> + <output name="sam_output_file" ftype="sam"> <assert_contents> + <has_size size="54554"/> <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> </assert_contents> </output> - <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size"> - <assert_contents> - <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/> - </assert_contents> - </output> <output_collection name="levels" type="list"> <element name="all" ftype="tabular"> <assert_contents> <has_text text="Gammaproteobacteria"/> <has_text text="Corynebacterium accolens"/> - <has_n_columns n="9"/> + <has_n_columns n="17"/> </assert_contents> </element> <element name="class" ftype="tabular"> <assert_contents> <has_text text="class"/> <has_text text="Actinobacteria"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="family" ftype="tabular"> <assert_contents> <has_text text="family"/> <has_text text="Propionibacteriaceae"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="genus" ftype="tabular"> <assert_contents> <has_text text="genus"/> <has_text text="Cutibacterium"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="kingdom" ftype="tabular"> <assert_contents> <has_text text="kingdom"/> <has_text text="Bacteria"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="order" ftype="tabular"> <assert_contents> <has_text text="order"/> <has_text text="Propionibacteriales"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="phylum" ftype="tabular"> <assert_contents> <has_text text="phylum"/> <has_text text="Firmicutes"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="species" ftype="tabular"> <assert_contents> <has_text text="species"/> <has_text text="Corynebacterium accolens"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> <element name="strains" ftype="tabular"> <assert_contents> <has_text text="strains"/> - <has_n_columns n="2"/> + <has_n_columns n="3"/> </assert_contents> </element> </output_collection> @@ -1326,7 +1252,7 @@ </test> <!-- Check a non-default analysis mode and viral analysis --> - <test expect_num_outputs="6"> + <test expect_num_outputs="5"> <section name="inputs"> <conditional name="in"> <param name="selector" value="raw"/> @@ -1338,6 +1264,9 @@ <conditional name="db"> <param name="db_selector" value="cached"/> <param name="cached_db" value="mpa_vJan21_TOY_CHOCOPhlAnSGB"/> + <conditional name="viral_analysis"> + <param name="profile_vsc" value="--profile_vsc"/> + </conditional> </conditional> </section> <section name="analysis"> @@ -1345,17 +1274,15 @@ <param name="t" value="marker_ab_table"/> </conditional> </section> - <conditional name="viral_analysis"> - <param name="profile_vsc" value="--profile_vsc"/> - </conditional> <conditional name="subsample"> <param name="selector" value="single"/> <param name="subsampling" value="10000"/> <param name="subsampling_seed" value="42"/> </conditional> <param name="test" value="true"/> - <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size"> + <output name="output_file" ftype="tabular"> <assert_contents> + <has_size size="7690"/> <has_text text="SGB7017__MKDPKOFL_00679"/> <has_text text="SampleID"/> <has_text text="Metaphlan_Analysis"/> @@ -1417,7 +1344,7 @@ Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the species present in the community, and then a strain-level profiling can be performed to zoom-in on -specific species of interest. This operation can be performed quickly as it exploits the bowtie2out +specific species of interest. This operation can be performed quickly as it exploits the mapout intermediate file saved during the execution of the default analysis type. Inputs @@ -1436,7 +1363,7 @@ The main output is a tab-separated file with the predicted taxon relative abundances. -It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs. +It also generates a BIOM file and some intermediate files (SAM and mapout) if sequence files are given as inputs. More help and use cases
