Mercurial > repos > iuc > vsearch
comparison clustering.xml @ 1:8c4e2933a17a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsearch commit 95732e013ec4dfe5dae0b9ed81e9d7710cbaed9d
author | iuc |
---|---|
date | Wed, 26 Aug 2015 13:34:22 -0400 |
parents | fae6527990af |
children | f29e21388219 |
comparison
equal
deleted
inserted
replaced
0:fae6527990af | 1:8c4e2933a17a |
---|---|
1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> | 1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.1"> |
2 <description></description> | 2 <description></description> |
3 <macros> | 3 <macros> |
4 <import>vsearch_macros.xml</import> | 4 <import>vsearch_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
8 <expand macro="version_command" /> | 8 <expand macro="version_command" /> |
9 <command> | 9 <command> |
10 <![CDATA[ | 10 <![CDATA[ |
11 vsearch | 11 vsearch |
12 @GENERAL@ | 12 @GENERAL@ |
13 --cluster_fast "$infile" | 13 |
14 ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory | 14 #if $clustering_mode.clustering_mode_select == 'cluster_fast': |
15 --cluster_fast "$infile" | |
16 #else if $clustering_mode.clustering_mode_select == 'cluster_smallmem': | |
17 --cluster_smallmem "$infile" | |
18 #end if | |
15 ##--clusters STRING output each cluster to a separate FASTA file | 19 ##--clusters STRING output each cluster to a separate FASTA file |
16 | 20 |
17 #if $maxrejects: | 21 #if $maxrejects: |
18 --maxrejects $maxrejects | 22 --maxrejects $maxrejects |
19 #end if | 23 #end if |
21 --maxaccepts $maxaccepts | 25 --maxaccepts $maxaccepts |
22 #end if | 26 #end if |
23 | 27 |
24 $cons_truncate | 28 $cons_truncate |
25 --id $id | 29 --id $id |
26 ##--iddef $iddef | 30 --iddef $iddef |
27 | 31 |
28 #if '--msaout' in str($outputs): | 32 #if '--msaout' in str($outputs): |
29 --msaout $msaout | 33 --msaout $msaout |
30 #end if | 34 #end if |
31 #if '--consout' in str($outputs): | 35 #if '--consout' in str($outputs): |
50 --matched $matched | 54 --matched $matched |
51 #end if | 55 #end if |
52 #if $qmask != 'no': | 56 #if $qmask != 'no': |
53 --qmask $qmask | 57 --qmask $qmask |
54 #end if | 58 #end if |
55 #if $sizein: | 59 $sizein |
56 --sizein $sizein | 60 $sizeout |
57 #end if | |
58 #if $sizeout: | |
59 --sizeout $sizeout | |
60 #end if | |
61 --strand $strand | 61 --strand $strand |
62 --usersort $usersort | 62 $usersort |
63 #if $uc: | |
64 --uc "$uc_outfile" | |
65 #end if | |
63 | 66 |
64 ]]> | 67 ]]> |
65 </command> | 68 </command> |
66 <inputs> | 69 <inputs> |
67 <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" /> | 70 <param name="infile" type="data" format="fasta" label="Select your input FASTA file" help="" /> |
71 <conditional name="clustering_mode"> | |
72 <param name="clustering_mode_select" type="select" label="Choose sorting method to use before clustering" help=""> | |
73 <option value="cluster_fast" default="True">Cluster sequences after sorting by length (--cluster-fast)</option> | |
74 <option value="cluster_smallmem">Cluster already sorted sequences (--cluster-smallmem)</option> | |
75 </param> | |
76 <when value="cluster_fast"> | |
77 </when> | |
78 <when value="cluster_smallmem"> | |
79 </when> | |
80 </conditional> | |
81 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" | |
82 label="Indicate that input sequences are not presorted by length" help="(--usersort)"/> | |
68 <expand macro="id_and_iddef" /> | 83 <expand macro="id_and_iddef" /> |
69 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" | 84 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" |
70 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> | 85 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> |
71 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" | |
72 label="Indicate that input sequences are presorted" help="(--usersort)"/> | |
73 <expand macro="qmask" /> | 86 <expand macro="qmask" /> |
74 <expand macro="sizein" /> | 87 <expand macro="sizein" /> |
75 <expand macro="sizeout" /> | 88 <expand macro="sizeout" /> |
76 <expand macro="strand" /> | 89 <expand macro="strand" /> |
77 <expand macro="maxrejects" /> | 90 <expand macro="maxrejects" /> |
81 <option value="--consout">Cluster consensus sequences</option> | 94 <option value="--consout">Cluster consensus sequences</option> |
82 <option value="--centroids">Centroid sequences</option> | 95 <option value="--centroids">Centroid sequences</option> |
83 <option value="--notmatched">Write non-matching query sequences to separate file</option> | 96 <option value="--notmatched">Write non-matching query sequences to separate file</option> |
84 <option value="--matched">Write matching query sequences to separate file</option> | 97 <option value="--matched">Write matching query sequences to separate file</option> |
85 </expand> | 98 </expand> |
99 <expand macro="uclust_like_output" /> | |
86 | 100 |
87 </inputs> | 101 </inputs> |
88 <outputs> | 102 <outputs> |
89 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> | 103 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> |
90 <filter>'--msaout' in outputs</filter> | 104 <filter>'--msaout' in outputs</filter> |
108 <filter>'--blast6out' in outputs</filter> | 122 <filter>'--blast6out' in outputs</filter> |
109 </data> | 123 </data> |
110 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> | 124 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> |
111 <filter>'--fastapairs' in outputs</filter> | 125 <filter>'--fastapairs' in outputs</filter> |
112 </data> | 126 </data> |
127 <data name="uc_outfile" format="tabular" label="${tool.name} on ${on_string}: UCLUST like output"> | |
128 <filter>uc is True</filter> | |
129 </data> | |
113 </outputs> | 130 </outputs> |
114 <tests> | 131 <tests> |
115 <test> | 132 <test> |
116 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | 133 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> |
117 <param name="id" value="0.99"/> | 134 <param name="id" value="0.99"/> |
118 <param name="maxaccepts" value="1"/> | 135 <param name="maxaccepts" value="1"/> |
119 <param name="maxrejects" value="2"/> | 136 <param name="maxrejects" value="2"/> |
120 <param name="sizeout" value="--sizeout"/> | 137 <param name="sizeout" value=""/> |
121 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> | 138 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> |
122 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> | 139 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> |
123 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> | 140 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> |
124 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" /> | 141 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" /> |
125 <!-- The result following result files would be too big --> | 142 <!-- The result following result files would be too big --> |
126 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /--> | 143 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /--> |
127 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /--> | 144 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /--> |
128 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> | 145 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> |
129 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> | 146 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> |
130 </test> | 147 </test> |
148 <test> | |
149 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | |
150 <param name="clustering_mode_select" value="cluster_smallmem"/> | |
151 <param name="usersort" value="--usersort"/> | |
152 <param name="id" value="0.99"/> | |
153 <param name="maxaccepts" value="1"/> | |
154 <param name="maxrejects" value="2"/> | |
155 <param name="sizeout" value="--sizeout"/> | |
156 <param name="outputs" value="--centroids,--blast6out,--notmatched" /> | |
157 <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> | |
158 <output name="blast6out" file="clustering_blast6out_result2.tab" ftype="tabular" /> | |
159 <output name="notmatched" file="clustering_notmatched_result2.fasta" ftype="fasta" /> | |
160 </test> | |
161 <test> | |
162 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | |
163 <param name="clustering_mode_select" value="cluster_smallmem"/> | |
164 <param name="usersort" value="--usersort"/> | |
165 <param name="id" value="0.99"/> | |
166 <param name="maxaccepts" value="1"/> | |
167 <param name="maxrejects" value="2"/> | |
168 <param name="sizeout" value="--sizeout"/> | |
169 <param name="outputs" value="--centroids" /> | |
170 <param name="uc" value="--uc"/> | |
171 <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" /> | |
172 <output name="uc_outfile" file="clustering_uc_result3.uc" ftype="tabular" /> | |
173 </test> | |
174 <test> | |
175 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | |
176 <param name="clustering_mode_select" value="cluster_smallmem"/> | |
177 <param name="usersort" value="--usersort"/> | |
178 <param name="id" value="0.99"/> | |
179 <param name="maxaccepts" value="1"/> | |
180 <param name="maxrejects" value="2"/> | |
181 <param name="sizeout" value="--sizeout"/> | |
182 <param name="outputs" value="--centroids" /> | |
183 <param name="iddef" value="0"/> | |
184 <output name="centroids" file="clustering_centroids_result4.fasta" ftype="fasta" /> | |
185 </test> | |
186 <test> | |
187 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | |
188 <param name="clustering_mode_select" value="cluster_fast"/> | |
189 <param name="usersort" value="--usersort"/> | |
190 <param name="id" value="0.99"/> | |
191 <param name="maxaccepts" value="1"/> | |
192 <param name="maxrejects" value="2"/> | |
193 <param name="sizeout" value=""/> | |
194 <param name="outputs" value="--centroids" /> | |
195 <param name="qmask" value="none"/> | |
196 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> | |
197 </test> | |
131 </tests> | 198 </tests> |
132 <help> | 199 <help> |
133 <![CDATA[ | 200 <![CDATA[ |
134 **What it does** | 201 **What it does** |
135 | 202 |
137 implemented in usearch, DNAclust and sumaclust for example. | 204 implemented in usearch, DNAclust and sumaclust for example. |
138 | 205 |
139 | 206 |
140 Clustering options (most searching options also apply) | 207 Clustering options (most searching options also apply) |
141 --centroids FILENAME output centroid sequences to FASTA file | 208 --centroids FILENAME output centroid sequences to FASTA file |
142 --cluster_fast FILENAME cluster sequences fast | 209 --cluster_fast FILENAME cluster sequences after sorting by length |
143 --cluster_smallmem FILENAME cluster sequences using a small amount of memory | 210 --cluster_size FILENAME cluster sequences after sorting by abundance |
211 --cluster_smallmem FILENAME cluster already sorted sequences (see -usersort) | |
144 --clusters STRING output each cluster to a separate FASTA file | 212 --clusters STRING output each cluster to a separate FASTA file |
145 --consout FILENAME output cluster consensus sequences to FASTA file | 213 --consout FILENAME output cluster consensus sequences to FASTA file |
146 --cons_truncate do not ignore terminal gaps in MSA for consensus | 214 --cons_truncate do not ignore terminal gaps in MSA for consensus |
147 --id REAL reject if identity lower | 215 --id REAL reject if identity lower |
148 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) | 216 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) |
149 --msaout FILENAME output multiple seq. alignments to FASTA file | 217 --msaout FILENAME output multiple seq. alignments to FASTA file |
150 --qmask mask seqs with dust, soft or no method (dust) | 218 --qmask seqs with dust, soft or no method (dust) |
151 --sizein read abundance annotation from input | 219 --sizein propagate abundance annotation from input |
152 --sizeout write cluster abundances to centroid file | 220 --sizeout write cluster abundances to centroid file |
153 --strand cluster using "plus" or "both" strands (plus) | 221 --strand cluster using plus or both strands (plus) |
154 --usersort indicate that input sequences are presorted | 222 --uc FILENAME filename for UCLUST-like output |
223 --usersort indicate sequences not presorted by length | |
155 | 224 |
156 | 225 |
157 @EXTERNAL_DOCUMENTATION@ | 226 @EXTERNAL_DOCUMENTATION@ |
158 | 227 |
159 ------- | 228 ------- |