view jellyfish.xml @ 1:11658afb8c87 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jellyfish commit 173be51ac6f23888ad4065877762b67e4cc9ed6e
author iuc
date Wed, 01 Nov 2023 09:32:45 +0000
parents b4038a4d48f5
children dcd30574a456
line wrap: on
line source

<tool id="jellyfish" name="jellyfish" version="@WRAPPER_VERSION@+@VERSION_SUFFIX@" profile="20.01">
    <macros>
        <token name="@WRAPPER_VERSION@">2.3.0</token>
        <token name="@VERSION_SUFFIX@">galaxy0</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">Jellyfish</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@WRAPPER_VERSION@">kmer-jellyfish</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        jellyfish
        #if $commands.command == "count":
            count
            --mer-len $commands.mer_len
            --size $commands.size
            #if $commands.counter_len:
                --counter-len $commands.counter_len
            #end if
            $commands.canonical
            #if $commands.bloom.filters == "bc":
                --bc '$commands.bloom.bloom_file'
                #if $commands.bloom.bf_fp:
                    --bf-fp $commands.bloom.bf_fp
                #end if
            #else if $commands.bloom.filters == "bf_size":
                --bf-size $commands.bloom.bf_size
                #if $commands.bloom.bf_fp:
                    --bf-fp $commands.bloom.bf_fp
                #end if
            #end if
            #if $commands.if:
                --if '$commands.if'
            #end if
            #if $commands.reprobes:
                --reprobes $commands.reprobes
            #end if
            #if $commands.text == "text":
                --text
            #end if
            #if $commands.lower_count:
                --lower-count $commands.lower_count
            #end if
            #if $commands.upper_count:
                --upper-count $commands.upper_count
            #end if
            '$commands.input'
        #else if $commands.command == "histo":
            histo
            #if $commands.low:
                --low $commands.low
            #end if
            #if $commands.high:
                --high $commands.high
            #end if
            #if $commands.increment:
                --increment $commands.increment
            #end if
            $commands.full
            '$commands.input'
            -o jellyfish_histo.txt
        #else if $commands.command == "dump":
            dump
            #if $commands.lower_count:
                --lower-count $commands.lower_count
            #end if
            #if $commands.upper_count:
                --upper-count $commands.upper_count
            #end if
            #if $commands.format.format_select == "column":
                --column
                $commands.format.tab
                '$commands.input'
                #if $commands.format.tab == "--tab":
                    -o jellyfish_dump.tsv
                #else:
                    -o jellyfish_dump.txt
                #end if
            #else:
                '$commands.input'
                -o jellyfish_dump.fasta
            #end if
        ## #else if $commands.command == "bc":
        ## #else if $commands.command == "info":
        ## #else if $commands.command == "stats":
        ## #else if $commands.command == "merge":
        ## #else if $commands.command == "query":
        #end if
    ]]></command>
    <inputs>
        <conditional name="commands">
            <param name="command" type="select" label="Jellyfish command: ">
                <option value="count"/>
                <option value="histo"/>
                <option value="dump"/>
                <!-- <option value="bc"/>
                <option value="info"/>
                <option value="stats"/>
                <option value="merge"/>
                <option value="query"/> -->
            </param>
            <when value="count">
                <param name="input" type="data" format="fasta,fastq,fastqsanger" label="Input data file"/>
                <param argument="--mer-len" type="integer" value="20" min="1" label="Length of mer" />
                <param argument="--size" type="integer" value="32" min="1" label="Initial hash size" />
                <param argument="--counter-len" type="integer" min="1" optional="true" label="Counter Length in bits" />
                <param argument="--canonical" type="boolean" truevalue="--canonical" falsevalue="" label="Count both strand and canonical representation" />
                <conditional name="bloom">
                    <param name="filters" type="select" label="Use bloom filters">
                        <option value="default">Default</option>
                        <option value="bc">Bloom counter to filter out singleton mers</option>
                        <option value="bf_size">Use bloom filter to count high-frequency mers</option>
                    </param>
                    <when value="default"/>
                    <when value="bc">
                        <param name="bloom_file" type="data" format="tabular,tsv" label="Bloom counter file"/>
                        <param argument="--bf-fp" type="float" label="False positive rate of bloom filter" optional="true"/>
                    </when>
                    <when value="bf_size">
                        <param name="bf_size" argument="--bf-size" type="integer" label="Use bloom filter to count high-frequency mers"  min="1" optional="true"/>
                        <param argument="--bf-fp" type="float" label="False positive rate of bloom filter" optional="true"/>
                    </when>
                </conditional>
                <param argument="--if" type="data" format="txt" label="Count only k-mers in this file" optional="true"/>
                <param argument="--reprobes" type="integer" label="Maximum number of repropbes" min="1" optional="true"/>
                <param argument="--text" type="boolean" label="Dump in text format" truevalue="text" falsevalue="jf"/>
                <param argument="--lower-count" type="integer" label="Lower count" min="1" optional="true" help="Don't output k-mer with count less than this value"/>
                <param argument="--upper-count" type="integer" label="upper count" min="1" optional="true" help="Don't output k-mer with count greater than this value"/>
            </when>
            <when value="histo"> 
                <param name="input" type="data" format="jellyfish" label="Input data file"/>
                <param argument="--low" type="integer" min="1" label="Lower count value of histogram" optional="true"/>
                <param argument="--high" type="integer" min="1" label="Upper count value of histogram" optional="true"/>
                <param argument="--increment" type="integer" min="1" label="Increment value for buckets" optional="true"/>
                <param argument="--full" type="boolean" label="Full histogram. Don't skip count 0" truevalue="--full" falsevalue=""/>
            </when>
            <when value="dump"> 
                <param name="input" type="data" format="jellyfish" label="Input data file"/>
                <conditional name="format">
                    <param name="format_select" type="select" label="Output format">
                        <option value="fasta"/>
                        <option value="column"/>
                    </param>
                    <when value="fasta"/>
                    <when value="column">
                        <param argument="--tab" type="boolean" label="Use tab separator" truevalue="--tab" falsevalue=""/>
                    </when>
                </conditional>
                <param argument="--lower-count" type="integer" min="0" optional="true" label="Don't output k-mer with count less than lower-count" />
                <param argument="--upper-count" type="integer" min="1" optional="true" label="Don't output k-mer with count greater than upper-count" />
            </when>
            <!-- <when value="bc"> 
            </when>
            <when value="info"> 

            </when>
            <when value="stats"> 

            </when>
            <when value="merge"> 

            </when>
            <when value="query"> 

            </when> -->
        </conditional>
    </inputs>
    <outputs>
        <data name="jellyfish_db" format="jellyfish" from_work_dir="mer_counts.jf" label="${tool.name} on ${on_string}: jellyfish db">
            <filter>commands["command"] == "count" and not commands["text"]</filter>
        </data>
        <data name="jellyfish_db_txt" format="txt" from_work_dir="mer_counts.jf" label="${tool.name} on ${on_string}: jellyfish db text">
            <filter>commands["command"] == "count" and commands["text"]</filter>
        </data>
        <data name="jellyfish_histo" format="txt" from_work_dir="jellyfish_histo.txt" label="${tool.name} on ${on_string}: histogram">
            <filter>commands["command"] == "histo"</filter>
        </data>
        <data name="jellyfish_fasta" format="fasta" from_work_dir="jellyfish_dump.fasta" label="${tool.name} on ${on_string}: fasta dump">
            <filter>commands["command"] == "dump" and commands["format"]["format_select"] == "fasta"</filter>
        </data>
        <data name="jellyfish_txt" format="txt" from_work_dir="jellyfish_dump.txt" label="${tool.name} on ${on_string}: txt dump">
            <filter>commands["command"] == "dump" and commands["format"]["format_select"] == "column" and not commands["format"]["tab"]</filter>
        </data>
        <data name="jellyfish_tsv" format="tsv" from_work_dir="jellyfish_dump.tsv" label="${tool.name} on ${on_string}: tabular dump">
            <filter>commands["command"] == "dump" and commands["format"]["format_select"] == "column" and commands["format"]["tab"]</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <conditional name="commands">
                <param name="commands" value="count"/>
                <param name="input" value="test.fastq"/>
                <param name="mer_len" value="22"/>
                <param name="size" value="16"/>
                <param name="canonical" value="--canonical"/>
            </conditional>
            <output name="jellyfish_db" file="out.jf" compare="sim_size" delta="1000"/>
        </test>
        <test expect_num_outputs="1">
            <conditional name="commands">
                <param name="commands" value="count"/>
                <param name="input" value="test.fastq"/>
                <param name="mer_len" value="22"/>
                <param name="size" value="16"/>
                <param name="canonical" value="--canonical"/>
                <conditional name="bloom">
                    <param name="filters" value="bf_size"/>
                    <param name="bf_size" value="2"/>
                    <param name="bf_fp" value="0.01"/>
                </conditional>
                <param name="text" value="text"/>
                <param name="lower" value="1"/>
                <param name="upper" value="20"/>
            </conditional>
            <output name="jellyfish_db_txt">
                <assert_contents>
                    <has_line line="GTGATCGAGAGCCACAGCGTTA 1"/>
                    <has_line line="GCCTAGTCGGGATCGTCACCTA 1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="commands">
                <param name="command" value="histo"/>
                <param name="input" value="out.jf" ftype="jellyfish"/>
                <param name="low" value="1"/>
                <param name="high" value="10000"/>
                <param name="full" value="--full"/> 
                <param name="increment" value="100"/>
            </conditional>
            <output name="jellyfish_histo" ftype="txt" file="histo.txt"/>
        </test>
        <test expect_num_outputs="1">
            <conditional name="commands">
                <param name="command" value="dump"/>
                <param name="input" value="out.jf" ftype="jellyfish"/>
                <param name="lower_count" value="3"/>
            </conditional>
            <output name="jellyfish_fasta" ftype="fasta" file="dump.fasta"/>
        </test>
        <test expect_num_outputs="1">
            <conditional name="commands">
                <param name="command" value="dump"/>
                <param name="input" value="out.jf" ftype="jellyfish"/>
                <conditional name="format">
                    <param name="format_select" value="column"/>
                    <param name="tab" value="--tab"/>
                </conditional>
                <param name="upper_count" value="100"/>
            </conditional>
            <output name="jellyfish_tsv" ftype="tsv">
                <assert_contents>
                    <has_line line="GTGATCGAGAGCCACAGCGTTA&#9;1"/>
                    <has_line line="GCCTAGTCGGGATCGTCACCTA&#9;1"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
        Jellyfish is a tool for fast, memory-efficient counting of k-mers in DNA. A k-mer is a substring of length k, and counting the occurrences of all such substrings is a central step in many analyses of DNA sequence. Jellyfish can count k-mers using an order of magnitude less memory and an order of magnitude faster than other k-mer counting packages by using an efficient encoding of a hash table and by exploiting the "compare-and-swap" CPU instruction to increase parallelism.

        JELLYFISH is a command-line program that reads FASTA and multi-FASTA files containing DNA sequences. It outputs its k-mer counts in a binary format, which can be translated into a human-readable text format using the "jellyfish dump" command, or queried for specific k-mers with "jellyfish query". See the documentation for details.
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btr011</citation>
    </citations>
</tool>