0
|
1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0">
|
|
2 <description></description>
|
|
3 <macros>
|
|
4 <import>vsearch_macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <expand macro="stdio" />
|
|
8 <expand macro="version_command" />
|
|
9 <command>
|
|
10 <![CDATA[
|
|
11 vsearch
|
|
12 @GENERAL@
|
|
13 --cluster_fast "$infile"
|
|
14 ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory
|
|
15 ##--clusters STRING output each cluster to a separate FASTA file
|
|
16
|
|
17 #if $maxrejects:
|
|
18 --maxrejects $maxrejects
|
|
19 #end if
|
|
20 #if $maxaccepts:
|
|
21 --maxaccepts $maxaccepts
|
|
22 #end if
|
|
23
|
|
24 $cons_truncate
|
|
25 --id $id
|
|
26 ##--iddef $iddef
|
|
27
|
|
28 #if '--msaout' in str($outputs):
|
|
29 --msaout $msaout
|
|
30 #end if
|
|
31 #if '--consout' in str($outputs):
|
|
32 --consout $consout
|
|
33 #end if
|
|
34 #if '--centroids' in str($outputs):
|
|
35 --centroids $centroids
|
|
36 #end if
|
|
37 #if '--alnout' in str($outputs):
|
|
38 --alnout $alnout
|
|
39 #end if
|
|
40 #if '--blast6out' in str($outputs):
|
|
41 --blast6out $blast6out
|
|
42 #end if
|
|
43 #if '--notmatched' in str($outputs):
|
|
44 --notmatched $notmatched
|
|
45 #end if
|
|
46 #if '--fastapairs' in str($outputs):
|
|
47 --fastapairs $fastapairs
|
|
48 #end if
|
|
49 #if '--matched' in str($outputs):
|
|
50 --matched $matched
|
|
51 #end if
|
|
52 #if $qmask != 'no':
|
|
53 --qmask $qmask
|
|
54 #end if
|
|
55 #if $sizein:
|
|
56 --sizein $sizein
|
|
57 #end if
|
|
58 #if $sizeout:
|
|
59 --sizeout $sizeout
|
|
60 #end if
|
|
61 --strand $strand
|
|
62 --usersort $usersort
|
|
63
|
|
64 ]]>
|
|
65 </command>
|
|
66 <inputs>
|
|
67 <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" />
|
|
68 <expand macro="id_and_iddef" />
|
|
69 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False"
|
|
70 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/>
|
|
71 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False"
|
|
72 label="Indicate that input sequences are presorted" help="(--usersort)"/>
|
|
73 <expand macro="qmask" />
|
|
74 <expand macro="sizein" />
|
|
75 <expand macro="sizeout" />
|
|
76 <expand macro="strand" />
|
|
77 <expand macro="maxrejects" />
|
|
78 <expand macro="maxaccepts" />
|
|
79 <expand macro="general_output">
|
|
80 <option value="--msaout">Multiple sequence alignments</option>
|
|
81 <option value="--consout">Cluster consensus sequences</option>
|
|
82 <option value="--centroids">Centroid sequences</option>
|
|
83 <option value="--notmatched">Write non-matching query sequences to separate file</option>
|
|
84 <option value="--matched">Write matching query sequences to separate file</option>
|
|
85 </expand>
|
|
86
|
|
87 </inputs>
|
|
88 <outputs>
|
|
89 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments">
|
|
90 <filter>'--msaout' in outputs</filter>
|
|
91 </data>
|
|
92 <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences">
|
|
93 <filter>'--consout' in outputs</filter>
|
|
94 </data>
|
|
95 <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids">
|
|
96 <filter>'--centroids' in outputs</filter>
|
|
97 </data>
|
|
98 <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
|
|
99 <filter>'--alnout' in outputs</filter>
|
|
100 </data>
|
|
101 <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
|
|
102 <filter>'--notmatched' in outputs</filter>
|
|
103 </data>
|
|
104 <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
|
|
105 <filter>'--matched' in outputs</filter>
|
|
106 </data>
|
|
107 <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
|
|
108 <filter>'--blast6out' in outputs</filter>
|
|
109 </data>
|
|
110 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
|
|
111 <filter>'--fastapairs' in outputs</filter>
|
|
112 </data>
|
|
113 </outputs>
|
|
114 <tests>
|
|
115 <test>
|
|
116 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
|
|
117 <param name="id" value="0.99"/>
|
|
118 <param name="maxaccepts" value="1"/>
|
|
119 <param name="maxrejects" value="2"/>
|
|
120 <param name="sizeout" value="--sizeout"/>
|
|
121 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" />
|
|
122 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
|
|
123 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" />
|
|
124 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" />
|
|
125 <!-- The result following result files would be too big -->
|
|
126 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /-->
|
|
127 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /-->
|
|
128 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /-->
|
|
129 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /-->
|
|
130 </test>
|
|
131 </tests>
|
|
132 <help>
|
|
133 <![CDATA[
|
|
134 **What it does**
|
|
135
|
|
136 vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms
|
|
137 implemented in usearch, DNAclust and sumaclust for example.
|
|
138
|
|
139
|
|
140 Clustering options (most searching options also apply)
|
|
141 --centroids FILENAME output centroid sequences to FASTA file
|
|
142 --cluster_fast FILENAME cluster sequences fast
|
|
143 --cluster_smallmem FILENAME cluster sequences using a small amount of memory
|
|
144 --clusters STRING output each cluster to a separate FASTA file
|
|
145 --consout FILENAME output cluster consensus sequences to FASTA file
|
|
146 --cons_truncate do not ignore terminal gaps in MSA for consensus
|
|
147 --id REAL reject if identity lower
|
|
148 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
|
|
149 --msaout FILENAME output multiple seq. alignments to FASTA file
|
|
150 --qmask mask seqs with dust, soft or no method (dust)
|
|
151 --sizein read abundance annotation from input
|
|
152 --sizeout write cluster abundances to centroid file
|
|
153 --strand cluster using "plus" or "both" strands (plus)
|
|
154 --usersort indicate that input sequences are presorted
|
|
155
|
|
156
|
|
157 @EXTERNAL_DOCUMENTATION@
|
|
158
|
|
159 -------
|
|
160
|
|
161 @REFERENCES@
|
|
162
|
|
163
|
|
164 ]]>
|
|
165 </help>
|
|
166 <expand macro="citations" />
|
|
167 </tool>
|