Mercurial > repos > iuc > metaphlan
changeset 2:a92a632c4d9b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
author | iuc |
---|---|
date | Mon, 14 Jun 2021 12:48:10 +0000 |
parents | b89b0765695d |
children | ff8f55893e7d |
files | formatoutput.py macros.xml metaphlan.xml test-data/no_taxon_input.fasta |
diffstat | 4 files changed, 174 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/formatoutput.py Mon May 17 20:10:24 2021 +0000 +++ b/formatoutput.py Mon Jun 14 12:48:10 2021 +0000 @@ -57,7 +57,9 @@ # skip headers if line.startswith("#"): continue - + # skip UNKNOWN lines in Predicted taxon relative abundances + if "UNKNOWN" in line: + continue # spit lines split_line = line[:-1].split('\t') taxo_n = split_line[0].split('|')
--- a/macros.xml Mon May 17 20:10:24 2021 +0000 +++ b/macros.xml Mon Jun 14 12:48:10 2021 +0000 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">3.0.8</token> + <token name="@TOOL_VERSION@">3.0.9</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">20.01</token> <xml name="edam_ontology"> @@ -21,7 +21,7 @@ </xml> <xml name="citations"> <citations> - <citation type="doi">1101/2020.11.19.388223</citation> + <citation type="doi">10.7554/eLife.65088</citation> </citations> </xml> </macros>
--- a/metaphlan.xml Mon May 17 20:10:24 2021 +0000 +++ b/metaphlan.xml Mon Jun 14 12:48:10 2021 +0000 @@ -195,8 +195,7 @@ --metaphlan_output '$output_file' --outdir 'split_levels' $out.legacy_output -&& -ls split_levels + #end if #if $out.krona_output @@ -211,26 +210,26 @@ <section name="inputs" title="Inputs" expanded="true"> <conditional name="in"> <param name="selector" type="select" label="Input(s)"> - <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option> + <option value="raw" selected="true">Fasta/FastQ file(s) with microbiota reads</option> <option value="sam">Externally BowTie2-mapped SAM file</option> - <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option> + <option value="bowtie2out">Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run</option> </param> <when value="raw"> <conditional name="raw_in"> - <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads"> + <param name="selector" type="select" label="Fasta/FastQ file(s) with microbiota reads"> <option value="single" selected="true">One single-end file</option> <option value="multiple">Multiple single-end files</option> <option value="paired">Paired-end files</option> </param> <when value="single"> - <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/> + <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with microbiota reads"/> </when> <when value="multiple"> - <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/> + <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with microbiota reads"/> </when> <when value="paired"> - <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/> - <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/> + <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with microbiota reads"/> + <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with microbiota reads"/> </when> </conditional> <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/> @@ -245,10 +244,10 @@ </section> </when> <when value="sam"> - <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/> + <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map microbiota reads"/> </when> <when value="bowtie2out"> - <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" + <param name="in" type="data" format="tabular" label="Intermediary mapping file of the microbiota generated by a previous MetaPhlAn run" help="File needs to be generated with MetaPhlAn versions >3.0"/> </when> </conditional> @@ -273,12 +272,12 @@ <section name="analysis" title="Analysis" expanded="true"> <conditional name="analysis_type"> <param argument="-t" type="select" label="Type of analysis to perform"> - <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option> - <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option> + <option value="rel_ab" selected="true">rel_ab: Profiling a microbiota in terms of relative abundances</option> + <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a microbiota in terms of relative abundances and estimate the number of reads comming from each clade</option> <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option> <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option> <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option> - <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option> + <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by microbiota size if number of reads is specified)</option> <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option> <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option> </param> @@ -296,8 +295,8 @@ <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/> </when> <when value="marker_ab_table"> - <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" - help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/> + <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original microbiota" + help="It is used for normalizing the length-normalized counts with the microbiota size as well. No normalization applied if the value is not specified"/> </when> <when value="marker_counts"/> <when value="marker_pres_table"> @@ -369,6 +368,152 @@ <conditional name="raw_in"> <!-- Single GZ file --> <param name="selector" value="single"/> + <param name="in" value="no_taxon_input.fasta"/> + </conditional> + <param name="read_min_len" value="70"/> + <section name="mapping"> + <param name="bt2_ps" value="sensitive"/> + <param name="min_mapq_val" value="5"/> + </section> + </conditional> + <conditional name="db"> + <!-- Cached db --> + <param name="db_selector" value="cached"/> + <param name="cached_db" value="test-db-20210409"/> + </conditional> + </section> + <section name="analysis"> + <conditional name="analysis_type"> + <param name="t" value="rel_ab"/> + <conditional name="tax_lev"> + <param name="tax_lev" value="a"/> + <param name="split_levels" value="true"/> + </conditional> + </conditional> + <param name="min_cu_len" value="2000"/> + <param name="organism_profiling" value="add_viruses"/> + <param name="stat" value="avg_g"/> + <param name="stat_q" value="0.2"/> + <param name="perc_nonzero" value="0.33"/> + <param name="avoid_disqm" value="true"/> + </section> + <section name="out"> + <param name="sample_id_key" value="SampleID"/> + <param name="sample_id" value="Metaphlan_Analysis"/> + <param name="use_group_representative" value="false"/> + <param name="legacy_output" value="false"/> + <param name="CAMI_format_output" value="false"/> + <param name="unknown_estimation" value="false"/> + <param name="krona_output" value="true"/> + </section> + <output name="output_file" ftype="tabular"> + <assert_contents> + <has_text text="UNKNOWN"/> + </assert_contents> + </output> + <output name="bowtie2out" ftype="tabular"> + <assert_contents> + <not_has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="sam_output_file" ftype="sam"> + <assert_contents> + <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/> + </assert_contents> + </output> + <output name="biom_output_file" ftype="biom1"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <not_has_text text="p__Actinobacteria"/> + </assert_contents> + </output> + <output_collection name="levels" type="list" > + <element name="all" ftype="tabular"> + <assert_contents> + <has_text text="class"/> + <has_n_columns n="17"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="kingdom" ftype="tabular"> + <assert_contents> + <has_text text="kingdom_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="phylum" ftype="tabular"> + <assert_contents> + <has_text text="phylum_id"/> + <not_has_text text="kingdom_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="class" ftype="tabular"> + <assert_contents> + <has_text text="class_id"/> + <not_has_text text="phylum_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="order" ftype="tabular"> + <assert_contents> + <has_text text="order_id"/> + <not_has_text text="class_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="family" ftype="tabular"> + <assert_contents> + <has_text text="family_id"/> + <not_has_text text="order"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="genus" ftype="tabular"> + <assert_contents> + <has_text text="genus_id"/> + <not_has_text text="family"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="species" ftype="tabular"> + <assert_contents> + <has_text text="species_id"/> + <not_has_text text="genus"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + <element name="strains" ftype="tabular"> + <assert_contents> + <has_text text="strains_id"/> + <not_has_text text="species_id"/> + <has_n_columns n="3"/> + <has_n_lines n="1"/> + </assert_contents> + </element> + </output_collection> + <output name="krona_output_file" ftype="tabular"> + <assert_contents> + <not_has_text text="k__Bacteria"/> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="6"> + <section name="inputs"> + <conditional name="in"> + <param name="selector" value="raw"/> + <conditional name="raw_in"> + <!-- Single GZ file --> + <param name="selector" value="single"/> <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/> </conditional> <param name="read_min_len" value="70"/> @@ -916,13 +1061,13 @@ - species-level resolution for bacteria, archaea, eukaryotes and viruses; - strain identification and tracking - orders of magnitude speedups compared to existing methods. -- metagenomic strain-level population genomics +- microbiota strain-level population genomics MetaPhlAn clade-abundance estimation ------------------------------------ The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and -strains in particular cases) present in the metagenome obtained from a microbiome sample and their +strains in particular cases) present in the microbiota obtained from a microbiome sample and their relative abundance. Marker level analysis @@ -944,7 +1089,7 @@ - one or several sequence files in Fasta, FastQ (compressed or not) - a BowTie2 produced SAM file -- an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run +- an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run It also need the reference database, which can be locally installed or customized using the dedicated tools. @@ -965,4 +1110,4 @@ ]]></help> <expand macro="citations"/> -</tool> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/no_taxon_input.fasta Mon Jun 14 12:48:10 2021 +0000 @@ -0,0 +1,4 @@ +> seq1 +ATTAGGGATTTTAGGGGGGGAGATTTAGAGAGAGAGAGAGAGAAGAAGAGAAGAAGAAGAAGAAAAAGGGGGAAGAGAGA +> seq2 +ATTAGGGATTTTAGGGGGGGAGATTTAGAGAGAGAGAGAGAGAAGAAGAGAAGAAGAAGAAGAAAAAGGGGGAAGAGAGA