Mercurial > repos > iuc > vsearch

<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="1.1.3">vsearch</requirement>
        </requirements>
    </xml>
    <xml name="version_command">
        <version_command>vsearch --version | head -n 1 | awk '{print $2}' | sed 's/,//'</version_command>
    </xml>
    <xml name="stdio">
        <stdio>
            <exit_code range="1:" />
            <exit_code range=":-1" />
            <regex match="Error:" />
            <regex match="Exception:" />
        </stdio>
    </xml>

    <xml name="topn">
        <param name="topn" type="integer" value="" optional="True" label="Output just first n sequences"
            help="(--topn)"/>
    </xml>

    <xml name="maxaccepts">
        <param name="maxaccepts" type="integer" value="1" label="Number of hits to accept and show per strand"
            help="(--maxaccepts)"/>
    </xml>
    <xml name="maxrejects">
        <param name="maxrejects" type="integer" value="32" label="Number of non-matching hits to consider"
            help="(--maxrejects)"/>
    </xml>
    <xml name="qmask">
        <param name="qmask" type="select" label="Mask sequences" help="(--qmask)">
            <option value="none">No masking</option>
            <option value="dust" selected="True">dust</option>
            <option value="soft">soft</option>
        </param>
    </xml>

    <xml name="hardmask">
        <param name="hardmask" type="boolean" truevalue="--hardmask" falsevalue="" checked="False"
            label="Mask by replacing with N instead of lower case" help="(--hardmask)"/>
    </xml>

    <xml name="id_and_iddef">
        <param name="iddef" type="select" label="ID definition" help="(--iddef)">
            <option value="0">CD-HIT</option>
            <option value="1">all</option>
            <option value="2" selected="True">int</option>
            <option value="3">MBL</option>
            <option value="4">BLAST</option>
        </param>
        <param name="id" type="float" value="" optional="True" label="Reject hit if identity is lower than this value"
            help="(--id)"/>
    </xml>

    <xml name="self_and_selfid">
        <param name="self_param" type="boolean" truevalue="--self" falsevalue="" checked="False"
            label="Exclude identical labels for --uchime_ref" help="(--self)"/>
        <param name="selfid_param" type="boolean" truevalue="--selfid" falsevalue="" checked="False"
            label="Exclude identical sequences for --uchime_ref" help="(--selfid)"/>
    </xml>

    <xml name="strand">
        <param name="strand" type="select" label="Strand specific clustering" help="(--strand)">
            <option value="plus" selected="True">Plus strand</option>
            <option value="both">Both strands</option>
        </param>
    </xml>

    <xml name="sizein">
        <param name="sizein" type="boolean" truevalue="--sizein" falsevalue="" checked="False"
            label="Read abundance annotation from input" help="(--sizein)"/>
    </xml>

    <xml name="sizeout">
        <param name="sizeout" type="boolean" truevalue="--sizeout" falsevalue="" checked="False"
            label="Write cluster abundances to centroid file" help="(--sizeout)"/>
    </xml>

    <xml name="uclust_like_output">
        <param name="uc" type="boolean" truevalue="--uc" falsevalue="" checked="False"
            label="UCLUST-like output" help="(--uc)"/>
    </xml>

    <token name="@GENERAL@">
        --threads "\${GALAXY_SLOTS:-4}"
        --notrunclabels
    </token>
    <token name="@USERFIELDS@">
        #if $userfields_output.userfields_output_select == 'yes':
            --userfields '#echo '+'.join( str($userfields_output.userfields).split(',') )#'
            --userout $userout
        #end if
    </token>
    <xml name="userfields_output">
        <data name="userout" format="tabular" label="${tool.name} on ${on_string}: tabular output">
            <filter>userfields_output.userfields_output_select == 'yes'</filter>
        </data>
    </xml>

    <xml name="userfields">
        <conditional name="userfields_output">
            <param name="userfields_output_select" type="select" label="User defined tabular output" help="(--userfields)">
                <option value="yes">Yes</option>
                <option value="no" selected="True">No</option>
            </param>
            <when value="yes">
                <param name="userfields" type="select" multiple="True" label="Fields in tabular output" help="For more information about the columns please scroll down to the tool help. (--userfields)">
                    <option value="aln">Print a string of M (match), D (delete, i.e. a gap in the query) and I (insert, i.e. a gap in the target) representing the pairwise alignment</option>
                    <option value="alnlen">Print the length of the query-target alignment (number of columns)</option>
                    <option value="bits">Bit score (not computed for nucleotide alignments)</option>
                    <option value="caln">Compact representation of the pairwise alignment using the CIGAR format (Compact Idiosyncratic Gapped Alignment Report): M (match), D (deletion) and I (insertion)</option>
                    <option value="evalue" selected="True">E-value (not computed for nucleotide alignments)</option>
                    <option value="exts">Number of columns containing a gap extension (zero or positive integer value)</option>
                    <option value="gaps">Number of columns containing a gap (zero or positive integer value)</option>
                    <option value="id">Percentage of identity</option>
                    <option value="id0">CD-HIT definition of the percentage of identity</option>
                    <option value="id1">The percentage of identity</option>
                    <option value="id2">The percentage of identity</option>
                    <option value="id3">Marine Biological Lab definition of the percentage of identity</option>
                    <option value="id4">BLAST definition of the percentage of identity</option>
                    <option value="ids">Number of matches in the alignment</option>
                    <option value="mism">Number of mismatches in the alignment</option>
                    <option value="opens">Number of columns containing a gap opening</option>
                    <option value="pairs">Number of columns containing only nucleotides</option>
                    <option value="pctgaps">Number of columns containing gaps expressed as a percentage of the alignment length</option>
                    <option value="pctpv">Percentage of positive columns. When working with nucleotide sequences, this is equivalent to the percentage of matches</option>
                    <option value="pv">Number of positive columns. When working with nucleotide sequences, this is equivalent to the number of matches</option>
                    <option value="qcov">Fraction of the query sequence that is aligned with the target sequence</option>
                    <option value="qframe">Query frame (-3 to +3). That field only concerns coding sequences and is not computed by vsearch</option>
                    <option value="qhi">Last nucleotide of the query aligned with the target</option>
                    <option value="qihi">Last nucleotide of the query aligned with the target</option>
                    <option value="qilo">First nucleotide of the query aligned with the target</option>
                    <option value="ql">Query sequence length</option>
                    <option value="qlo">First nucleotide of the query aligned with the target</option>
                    <option value="qrow">Print the sequence of the query segment as seen in the pairwise alignment</option>
                    <option value="qs">Query segment length</option>
                    <option value="qstrand">Query strand orientation (+ or - for nucleotide sequences)</option>
                    <option value="query" selected="True">Query label</option>
                    <option value="raw">Raw alignment score (negative, null or positive integer value)</option>
                    <option value="target" selected="True">Target label</option>
                    <option value="tcov">Fraction of the target sequence that is aligned with the query sequence</option>
                    <option value="tframe">Target frame (-3 to +3) - not computed by vsearch</option>
                    <option value="thi">Last nucleotide of the target aligned with the query</option>
                    <option value="tihi">Last nucleotide of the target aligned with the query (ignoring terminal gaps)</option>
                    <option value="tilo">First nucleotide of the target aligned with the query (ignoring initial gaps)</option>
                    <option value="tl">Target sequence length (positive integer value)</option>
                    <option value="tlo">First nucleotide of the target aligned with the query</option>
                    <option value="trow">Print the sequence of the target segment as seen in the pairwise alignment</option>
                    <option value="ts">Target segment length</option>
                    <option value="tstrand">Target strand orientation (+ or - for nucleotide sequences)</option>
                </param>
            </when>
            <when value="no" />
        </conditional>
    </xml>

    <xml name="general_output">
        <param name="outputs" type="select" multiple="True" label="Select output files" help="">
            <option value="--alnout">Human-readable alignment output</option>
            <option value="--blast6out" selected="True">Blast-like tab-separated output</option>
            <option value="--fastapairs">Write query/target pairs of sequences</option>
            <yield/>
            <validator type="no_options" message="Please select at least one output." />
        </param>
    </xml>

    <token name="@VERSION@">1.1.3</token>
    <token name="@EXTERNAL_DOCUMENTATION@">
<![CDATA[

For details about this tool, please refer to the `github account <https://github.com/torognes/vsearch>`_ or the `vsearch manual <https://github.com/torognes/vsearch/raw/master/doc/vsearch_manual.pdf>`_.

]]>
    </token>
    <token name="@USERFIELDS_HELP@">
**Available fields in user defined tabular output**

========= ================
Key       Description
========= ================
aln       Print a string of M (match), D (delete, i.e. a gap in the query) and I (insert, i.e. a gap in the target) representing the pairwise alignment. Empty field if there is no alignment.
alnlen    Print the length of the query-target alignment (number of columns). The field is set to 0 if there is no alignment.
bits      Bit score (not computed for nucleotide alignments). Always set to 0.
caln      Compact representation of the pairwise alignment using the CIGAR format (Compact Idiosyncratic Gapped Alignment Report): M (match), D (deletion) and I (insertion). Empty field if there is no alignment.
evalue    E-value (not computed for nucleotide alignments). Always set to -1.
exts      Number of columns containing a gap extension (zero or positive integer value).
gaps      Number of columns containing a gap (zero or positive integer value).
id        Percentage of identity (real value ranging from 0.0 to 100.0). The percentage identity is defined as 100 * (matching columns) / (alignment length - terminal gaps).
id0       CD-HIT definition of the percentage of identity (real value ranging from 0.0 to 100.0) using the length of the shortest sequence in the pairwise alignment as denominator: 100 * (matching columns) / (shortest sequence length).
id1       The percentage of identity (real value ranging from 0.0 to 100.0) is defined as the edit distance: 100 * (matching columns) / (alignment length).
id2       The percentage of identity (real value ranging from 0.0 to 100.0) is defined as the edit distance, excluding terminal gaps. The field id2 is an alias for the field id.
id3       Marine Biological Lab definition of the percentage of identity (real value ranging from 0.0 to 100.0), counting each extended gap (internal or terminal) as a single difference and using the length of the longest sequence in the pairwise alignment as denominator: 100 * (1.0 - [(mismatches + gaps) / (longest sequence length)]).
id4       BLAST definition of the percentage of identity (real value ranging from 0.0 to 100.0), equivalent to −−iddef 2 in a context of global pairwise alignment.
ids       Number of matches in the alignment (zero or positive integer value).
mism      Number of mismatches in the alignment (zero or positive integer value).
opens     Number of columns containing a gap opening (zero or positive integer value).
pairs     Number of columns containing only nucleotides. That value corresponds to the length of the alignment minus the gap-containing columns (zero or positive integer value).
pctgaps   Number of columns containing gaps expressed as a percentage of the alignment length (real value ranging from 0.0 to 100.0).
pctpv     Percentage of positive columns. When working with nucleotide sequences, this is equivalent to the percentage of matches (real value ranging from 0.0 to 100.0).
pv        Number of positive columns. When working with nucleotide sequences, this is equivalent to the number of matches (zero or positive integer value).
qcov      Fraction of the query sequence that is aligned with the target sequence (real value ranging from 0.0 to 100.0). The query coverage is computed as 100.0 * (matches + mismatches) / query sequence length. Internal or terminal gaps are not taken into account. The field is set to 0.0 if there is no alignment.
qframe    Query frame (-3 to +3). That field only concerns coding sequences and is not computed by vsearch. Always set to +0.
qhi       Last nucleotide of the query aligned with the target. Always equal to the length of the pairwise alignment. The field is set to 0 if there is no alignment.
qihi      Last nucleotide of the query aligned with the target (ignoring terminal gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.
qilo      First nucleotide of the query aligned with the target (ignoring initial gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.
ql        Query sequence length (positive integer value). The field is set to 0 if there is no alignment.
qlo       First nucleotide of the query aligned with the target. Always equal to 1 if there is an alignment, 0 otherwise.
qrow      Print the sequence of the query segment as seen in the pairwise alignment (i.e. with gap insertions if need be). Empty field if there is no alignment.
qs        Query segment length. Always equal to query sequence length.
qstrand   Query strand orientation (+ or - for nucleotide sequences). Empty field if there is no alignment.
query     Query label.
raw       Raw alignment score (negative, null or positive integer value). The score is the sum of match rewards minus mismatch penalties, gap openings and gap extensions. The field is set to 0 if there is no alignment.
target    Target label. The field is set to "*" if there is no alignment.
tcov      Fraction of the target sequence that is aligned with the query sequence (real value rang-ing from 0.0 to 100.0). The target coverage is computed as 100.0 * (matches + mis-matches) / target sequence length. Internal or terminal gaps are not taken into account. The field is set to 0.0 if there is no alignment.
tframe    Target frame (-3 to +3). That field only concerns coding sequences and is not computed by vsearch. Always set to +0.
thi       Last nucleotide of the target aligned with the query. Always equal to the length of the pairwise alignment. The field is set to 0 if there is no alignment.
tihi      Last nucleotide of the target aligned with the query (ignoring terminal gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.
tilo      First nucleotide of the target aligned with the query (ignoring initial gaps). Nucleotide numbering starts from 1. The field is set to 0 if there is no alignment.
tl        Target sequence length (positive integer value). The field is set to 0 if there is no alignment.
tlo       First nucleotide of the target aligned with the query. Always equal to 1 if there is an alignment, 0 otherwise.
trow      Print the sequence of the target segment as seen in the pairwise alignment (i.e. with gap insertions if need be). Empty field if there is no alignment.
ts        Target segment length. Always equal to target sequence length. The field is set to 0 if there is no alignment.
tstrand   Target strand orientation (+ or - for nucleotide sequences). Always set to "+", so reverse strand matches have tstrand "+" and qstrand "-". Empty field if there is no alignment.
========= ================

    </token>
    <xml name="citations">
        <citations>
            <citation type="doi">10.5281/zenodo.15524</citation>
            <yield />
        </citations>
    </xml>
</macros>
author	iuc
date	Thu, 17 Dec 2015 12:53:39 -0500
parents	8c4e2933a17a
children	4258854759ba