view dereplication.xml @ 1:8c4e2933a17a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsearch commit 95732e013ec4dfe5dae0b9ed81e9d7710cbaed9d
author iuc
date Wed, 26 Aug 2015 13:34:22 -0400
parents fae6527990af
children f29e21388219
line wrap: on
line source

<tool id="vsearch_dereplication" name="VSearch dereplication" version="@VERSION@.1">
    <description></description>
    <macros>
        <import>vsearch_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
<![CDATA[
    vsearch
        @GENERAL@
        --derep_fulllength $infile
        #if $maxuniquesize:
            --maxuniquesize $maxuniquesize
        #end if
        #if $minuniquesize:
            --minuniquesize $minuniquesize
        #end if
        --output $outfile
        $sizein
	$sizeout
        --strand $strand
        #if $topn:
            --topn $topn
        #end if
        #if $uc:
            --uc $uc_outfile
        #end if
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--derep_fulllength)" />
        <expand macro="topn" />
        <expand macro="sizein" />
        <expand macro="sizeout" />
        <expand macro="strand" />
        <expand macro="uclust_like_output" />
        <param name="minuniquesize" type="integer" value="" optional="True" label="Minimum abundance"
            help="(--minuniquesize)"/>
        <param name="maxuniquesize" type="integer" value="" optional="True" label="Maximum abundance"
            help="(--maxuniquesize)"/>
    </inputs>
    <outputs>
        <data name="outfile" format="fasta" label="${tool.name} on ${on_string}" />
        <data name="uc_outfile" format="fasta" label="${tool.name} on ${on_string}: UCLUST like output">
            <filter>uc is True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" />
            <param name="strand" value="both" />
            <param name="minuniquesize" value="1" />
            <param name="maxuniquesize" value="100000" />
            <param name="topn" value="10000" />
            <output name="outfile" file="dereplication_result1.fasta" ftype="fasta" />
        </test>
        <test>
            <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" />
            <param name="strand" value="both" />
            <param name="minuniquesize" value="1" />
            <param name="maxuniquesize" value="100000" />
            <param name="sizeout" value="--sizeout"/>
            <param name="topn" value="" />
            <output name="outfile" file="dereplication_result2.fasta" ftype="fasta" />
        </test>
        <test>
            <param name="infile" value="AF091148_first_rep.fsa.bz2" ftype="fasta" />
            <param name="strand" value="both" />
            <param name="minuniquesize" value="1" />
            <param name="maxuniquesize" value="100000" />
            <param name="sizeout" value="--sizeout"/>
            <param name="topn" value="10000" />
            <param name="uc" value="--uc" />
            <output name="outfile" file="dereplication_result2.fasta" ftype="fasta" />
            <output name="uc_outfile" file="dereplication_uc_result3.fasta" ftype="fasta" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

Merge strictly identical sequences contained in filename. Identical sequences are
defined as having the same length and the same string of nucleotides (case insensitive,
T and U are considered the same).

Dereplication options
  --derep_fulllength FILENAME  dereplicate sequences in the given FASTA file
  --maxuniquesize INT          maximum abundance for output from dereplication
  --minuniquesize INT          minimum abundance for output from dereplication
  --output FILENAME            output FASTA file
  --sizein                     read abundance annotation from input
  --sizeout                    write abundance annotation to output
  --strand                     dereplicate "plus" or "both" strands (plus)
  --topn INT                   output just the n most abundant sequences
  --uc FILENAME                filename for UCLUST-like output


@EXTERNAL_DOCUMENTATION@

-------

@REFERENCES@


]]>
    </help>
    <expand macro="citations" />
</tool>