view fasta-stats.xml @ 4:0dbb995c7d35 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
author iuc
date Thu, 18 Nov 2021 20:56:57 +0000
parents 56022eb50bbd
children
line wrap: on
line source

<tool id="fasta-stats" name="Fasta Statistics" version="2.0" profile="20.05">
    <description>display summary statistics for a FASTA file</description>
    <requirements>
        <requirement type="package" version="1.21.4">numpy</requirement>
        <requirement type="package" version="1.79">biopython</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        python '${__tool_directory__}/fasta-stats.py'
        --fasta '$fasta'
        --stats_output '$stats_output'
        #if $gaps_option
            --gaps_output '$gaps_output'
        #end if
        #if $genome_size
            --genome_size $genome_size
        #end if
        ]]>
    </command>
    <inputs>
        <param argument="--fasta" type="data" format="fasta" label="FASTA or Multi-FASTA file" help="FASTA dataset to get statistics."/>
        <param argument="--genome_size" type="integer" min="0" optional="true" label="Estimated genome size" help="This parameter is optional. If provided, it will be used for calculating the NG50 statistic." />
        <param argument="--gaps_option" type="boolean" truevalue="true" falsevalue="false" label="Generate gap stats"/>
    </inputs>
    <outputs>
        <data name="stats_output" format="tabular" label="${tool.name} on ${on_string}: summary stats"/>
        <data name="gaps_output" format="bed" label="${tool.name} on ${on_string}: Gap stats">
            <filter>gaps_option</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="fasta" value="test.fasta" ftype="fasta"/>
            <output name="stats_output" file="test_01.tab" ftype="tabular"/>
        </test>
        <!--Test gap options and NG50-->
        <test expect_num_outputs="2">
            <param name="fasta" value="ng50_input.fasta" ftype="fasta"/>
            <param name="gaps_option" value="true"/>
            <param name="genome_size" value="4000"/>
            <output name="stats_output" file="test_02.tab" ftype="tabular"/>
            <output name="gaps_output" file="test_02.bed" ftype="bed"/>
        </test>
        <!--Compare outputs with QUAST-->
        <test expect_num_outputs="1">
            <param name="fasta" value="test_long_sequence.fasta" ftype="fasta"/>
            <output name="stats_output" ftype="tabular">
                <assert_contents>
                    <has_text text="8353"/>
                    <has_text text="303889"/>
                    <has_text text="22107"/>
                </assert_contents>  
            </output>
        </test>
    </tests>
    <help><![CDATA[

 .. class:: infomark

**Purpose**

Displays the summary statistics for a FASTA file.

------

 .. class:: infomark

**Outputs**

This tool generates two outputs: a general summary and an optional gap stats file.

The general summary includes the following information:

- Lengths: n50, min, max, median and average
- Number of base pairs: A, C, G, T, N, Total and Total_not_N
- Number of sequences
- GC content

In addition the optional gap stats BED file includes the information about gaps localization.
    ]]>
    </help>
    <citations>
        <citation type="bibtex">
            @UNPUBLISHED{Anmol_Kyran2021,
                author = {Anmol Kyran},
                title = {Fasta Statistics: Display summary statistics for a fasta file.},
                year = {2021},
                url = {https://github.com/galaxyproject/tools-iuc},
            }
        </citation>
    </citations>
</tool>