diff fasta-stats.xml @ 4:0dbb995c7d35 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
author iuc
date Thu, 18 Nov 2021 20:56:57 +0000
parents 56022eb50bbd
children
line wrap: on
line diff
--- a/fasta-stats.xml	Mon Jul 05 13:36:26 2021 +0000
+++ b/fasta-stats.xml	Thu Nov 18 20:56:57 2021 +0000
@@ -1,66 +1,91 @@
-<tool id="fasta-stats" name="Fasta Statistics" version="1.0.3">
-    <description>Display summary statistics for a fasta file.</description>
+<tool id="fasta-stats" name="Fasta Statistics" version="2.0" profile="20.05">
+    <description>display summary statistics for a FASTA file</description>
     <requirements>
-        <requirement type="package" version="5.26">perl</requirement>
+        <requirement type="package" version="1.21.4">numpy</requirement>
+        <requirement type="package" version="1.79">biopython</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        perl '${__tool_directory__}/fasta-stats.pl'
-        '$dataset'
-        #if $genome_size:
-            $genome_size
+        python '${__tool_directory__}/fasta-stats.py'
+        --fasta '$fasta'
+        --stats_output '$stats_output'
+        #if $gaps_option
+            --gaps_output '$gaps_output'
         #end if
-        > '$stats'
+        #if $genome_size
+            --genome_size $genome_size
+        #end if
         ]]>
     </command>
     <inputs>
-        <param name="dataset" type="data" format="fasta" label="fasta or multifasta file" help="fasta dataset to get statistics for."/>
-        <param name="genome_size" type="float" optional="True" label="Genome size estimate (optional)" help="Estimate of the genome size in bases. If specified, NG50 and LG50 will be calculated."/>
+        <param argument="--fasta" type="data" format="fasta" label="FASTA or Multi-FASTA file" help="FASTA dataset to get statistics."/>
+        <param argument="--genome_size" type="integer" min="0" optional="true" label="Estimated genome size" help="This parameter is optional. If provided, it will be used for calculating the NG50 statistic." />
+        <param argument="--gaps_option" type="boolean" truevalue="true" falsevalue="false" label="Generate gap stats"/>
     </inputs>
     <outputs>
-        <data name="stats" format="tabular" label="${tool.name} on ${on_string}: Fasta summary stats"/>
+        <data name="stats_output" format="tabular" label="${tool.name} on ${on_string}: summary stats"/>
+        <data name="gaps_output" format="bed" label="${tool.name} on ${on_string}: Gap stats">
+            <filter>gaps_option</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
-            <param name="dataset" value="test.fasta"/>
-            <output name="stats" file="test_out.txt"/>
+        <test expect_num_outputs="1">
+            <param name="fasta" value="test.fasta" ftype="fasta"/>
+            <output name="stats_output" file="test_01.tab" ftype="tabular"/>
         </test>
-        <test>
-            <param name="dataset" value="ng50_input.fasta"/>
+        <!--Test gap options and NG50-->
+        <test expect_num_outputs="2">
+            <param name="fasta" value="ng50_input.fasta" ftype="fasta"/>
+            <param name="gaps_option" value="true"/>
             <param name="genome_size" value="4000"/>
-            <output name="stats" file="ng50_out.txt"/>
+            <output name="stats_output" file="test_02.tab" ftype="tabular"/>
+            <output name="gaps_output" file="test_02.bed" ftype="bed"/>
+        </test>
+        <!--Compare outputs with QUAST-->
+        <test expect_num_outputs="1">
+            <param name="fasta" value="test_long_sequence.fasta" ftype="fasta"/>
+            <output name="stats_output" ftype="tabular">
+                <assert_contents>
+                    <has_text text="8353"/>
+                    <has_text text="303889"/>
+                    <has_text text="22107"/>
+                </assert_contents>  
+            </output>
         </test>
     </tests>
-    <help>
-**Fasta Stats**
-Displays the summary statistics for a fasta file.
+    <help><![CDATA[
+
+ .. class:: infomark
+
+**Purpose**
+
+Displays the summary statistics for a FASTA file.
 
 ------
 
-Outputs in tabular form:
-    Lengths: n50, min, max, median and average
+ .. class:: infomark
+
+**Outputs**
 
-    Number of base pairs: A, C, G, T, N, Total and Total_not_N
+This tool generates two outputs: a general summary and an optional gap stats file.
 
-    Number of sequences
+The general summary includes the following information:
 
-    GC content in %
-
-    If an optional genome size estimate is specified, then the NG50 length will also be calculated. 
+- Lengths: n50, min, max, median and average
+- Number of base pairs: A, C, G, T, N, Total and Total_not_N
+- Number of sequences
+- GC content
 
-------
-
-Inputs:
-
-Fasta dataset
+In addition the optional gap stats BED file includes the information about gaps localization.
+    ]]>
     </help>
     <citations>
         <citation type="bibtex">
-@UNPUBLISHED{Seemann_Gladman2012,
-    author = {Torsten Seemann and Simon Gladman},
-    title = {Fasta Statistics: Display summary statistics for a fasta file.},
-    year = {2012},
-    url = {https://github.com/galaxyproject/tools-iuc},
-}
+            @UNPUBLISHED{Anmol_Kyran2021,
+                author = {Anmol Kyran},
+                title = {Fasta Statistics: Display summary statistics for a fasta file.},
+                year = {2021},
+                url = {https://github.com/galaxyproject/tools-iuc},
+            }
         </citation>
     </citations>
 </tool>