annotate dereplication/dereplicate.xml @ 0:88fc52f1c5db draft default tip

Uploaded
author qfab
date Wed, 28 May 2014 20:34:11 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
1 <tool id="usearch_derep_full" name="Dereplicate" version="1.0.0">
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
2 <description>Remove duplicate sequences</description>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
3 <command>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
4 #if [ $mode == "fulllength" ]
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
5 usearch -derep_fulllength $input -output $output -sizeout 2&gt;1;
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
6 #elif [ $mode == "prefix" ]
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
7 usearch -derep_prefix $input -output $output -sizeout 2&gt;1;
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
8 #else
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
9 echo 'Unrecognised mode:' $mode;
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
10 echo ' [fulllength|prefix] only';
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
11 #end if
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
12 </command>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
13 <inputs>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
14 <param name='input' type='data' format='fasta' label='Input sequence file' />
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
15 <param name='mode' type='select' label='Criteria used for duplicate detection'>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
16 <option value='fulllength'>Full length</option>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
17 <option value='prefix'>Prefix</option>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
18 </param>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
19 </inputs>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
20
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
21 <outputs>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
22 <data name='output' format='fasta' />
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
23 </outputs>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
24
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
25 <help>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
26 ===========
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
27 Description
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
28 ===========
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
29
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
30 Removes duplicate sequences using one of two modes (below), from the Usearch-Tool-Suite_.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
31
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
32 .. _Usearch-Tool-Suite: http://www.drive5.com/usearch/
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
33
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
34 -----
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
35
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
36 -----
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
37 Input
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
38 -----
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
39
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
40 File of reads in FASTA format.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
41
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
42 ----------
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
43 Parameters
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
44 ----------
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
45
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
46 Full length
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
47 Matching is performed over the full length of the sequences, all identical sequences except one are removed.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
48 Prefix
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
49 A sequence (A) is discarded, if it is a prefix of another sequence (B). The first part of the sequence is identical.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
50
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
51 ------
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
52 Output
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
53 ------
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
54
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
55 A FASTA file containing only unique sequences according to the criteria chosen for the duplicate detection. The identifier line for each sequence states the representative sequence followed by the number of identical sequences found.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
56
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
57 e.g. >sequenceXXXX;size=1443;
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
58
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
59 sequenceXXXX is the representative of 1443 identical sequences.
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
60
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
61 -----
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
62
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
63 =========
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
64 Resources
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
65 =========
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
66
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
67 Dereplication_
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
68
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
69 .. _Dereplication: http://drive5.com/usearch/manual/dereplication.html
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
70
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
71 **Author**
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
72
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
73 Robert C. Edgar (bob@drive5.com)
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
74
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
75 **Wrapper Author**
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
76
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
77 QFAB Bioinformatics (support@qfab.org)
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
78 </help>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
79 <tests>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
80 <test>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
81 <param name="input" value="seqs.fasta" />
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
82 <param name="mode" value="fulllength" />
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
83 <output name="output" file="seqs_derep.fasta" ftype="fasta" lines_diff="10" />
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
84 </test>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
85 </tests>
88fc52f1c5db Uploaded
qfab
parents:
diff changeset
86 </tool>