comparison dereplication/dereplicate.xml @ 0:88fc52f1c5db draft default tip

Uploaded
author qfab
date Wed, 28 May 2014 20:34:11 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:88fc52f1c5db
1 <tool id="usearch_derep_full" name="Dereplicate" version="1.0.0">
2 <description>Remove duplicate sequences</description>
3 <command>
4 #if [ $mode == "fulllength" ]
5 usearch -derep_fulllength $input -output $output -sizeout 2&gt;1;
6 #elif [ $mode == "prefix" ]
7 usearch -derep_prefix $input -output $output -sizeout 2&gt;1;
8 #else
9 echo 'Unrecognised mode:' $mode;
10 echo ' [fulllength|prefix] only';
11 #end if
12 </command>
13 <inputs>
14 <param name='input' type='data' format='fasta' label='Input sequence file' />
15 <param name='mode' type='select' label='Criteria used for duplicate detection'>
16 <option value='fulllength'>Full length</option>
17 <option value='prefix'>Prefix</option>
18 </param>
19 </inputs>
20
21 <outputs>
22 <data name='output' format='fasta' />
23 </outputs>
24
25 <help>
26 ===========
27 Description
28 ===========
29
30 Removes duplicate sequences using one of two modes (below), from the Usearch-Tool-Suite_.
31
32 .. _Usearch-Tool-Suite: http://www.drive5.com/usearch/
33
34 -----
35
36 -----
37 Input
38 -----
39
40 File of reads in FASTA format.
41
42 ----------
43 Parameters
44 ----------
45
46 Full length
47 Matching is performed over the full length of the sequences, all identical sequences except one are removed.
48 Prefix
49 A sequence (A) is discarded, if it is a prefix of another sequence (B). The first part of the sequence is identical.
50
51 ------
52 Output
53 ------
54
55 A FASTA file containing only unique sequences according to the criteria chosen for the duplicate detection. The identifier line for each sequence states the representative sequence followed by the number of identical sequences found.
56
57 e.g. >sequenceXXXX;size=1443;
58
59 sequenceXXXX is the representative of 1443 identical sequences.
60
61 -----
62
63 =========
64 Resources
65 =========
66
67 Dereplication_
68
69 .. _Dereplication: http://drive5.com/usearch/manual/dereplication.html
70
71 **Author**
72
73 Robert C. Edgar (bob@drive5.com)
74
75 **Wrapper Author**
76
77 QFAB Bioinformatics (support@qfab.org)
78 </help>
79 <tests>
80 <test>
81 <param name="input" value="seqs.fasta" />
82 <param name="mode" value="fulllength" />
83 <output name="output" file="seqs_derep.fasta" ftype="fasta" lines_diff="10" />
84 </test>
85 </tests>
86 </tool>