comparison clustering.xml @ 1:8c4e2933a17a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsearch commit 95732e013ec4dfe5dae0b9ed81e9d7710cbaed9d
author iuc
date Wed, 26 Aug 2015 13:34:22 -0400
parents fae6527990af
children f29e21388219
comparison
equal deleted inserted replaced
0:fae6527990af 1:8c4e2933a17a
1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> 1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.1">
2 <description></description> 2 <description></description>
3 <macros> 3 <macros>
4 <import>vsearch_macros.xml</import> 4 <import>vsearch_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command> 9 <command>
10 <![CDATA[ 10 <![CDATA[
11 vsearch 11 vsearch
12 @GENERAL@ 12 @GENERAL@
13 --cluster_fast "$infile" 13
14 ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory 14 #if $clustering_mode.clustering_mode_select == 'cluster_fast':
15 --cluster_fast "$infile"
16 #else if $clustering_mode.clustering_mode_select == 'cluster_smallmem':
17 --cluster_smallmem "$infile"
18 #end if
15 ##--clusters STRING output each cluster to a separate FASTA file 19 ##--clusters STRING output each cluster to a separate FASTA file
16 20
17 #if $maxrejects: 21 #if $maxrejects:
18 --maxrejects $maxrejects 22 --maxrejects $maxrejects
19 #end if 23 #end if
21 --maxaccepts $maxaccepts 25 --maxaccepts $maxaccepts
22 #end if 26 #end if
23 27
24 $cons_truncate 28 $cons_truncate
25 --id $id 29 --id $id
26 ##--iddef $iddef 30 --iddef $iddef
27 31
28 #if '--msaout' in str($outputs): 32 #if '--msaout' in str($outputs):
29 --msaout $msaout 33 --msaout $msaout
30 #end if 34 #end if
31 #if '--consout' in str($outputs): 35 #if '--consout' in str($outputs):
50 --matched $matched 54 --matched $matched
51 #end if 55 #end if
52 #if $qmask != 'no': 56 #if $qmask != 'no':
53 --qmask $qmask 57 --qmask $qmask
54 #end if 58 #end if
55 #if $sizein: 59 $sizein
56 --sizein $sizein 60 $sizeout
57 #end if
58 #if $sizeout:
59 --sizeout $sizeout
60 #end if
61 --strand $strand 61 --strand $strand
62 --usersort $usersort 62 $usersort
63 #if $uc:
64 --uc "$uc_outfile"
65 #end if
63 66
64 ]]> 67 ]]>
65 </command> 68 </command>
66 <inputs> 69 <inputs>
67 <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" /> 70 <param name="infile" type="data" format="fasta" label="Select your input FASTA file" help="" />
71 <conditional name="clustering_mode">
72 <param name="clustering_mode_select" type="select" label="Choose sorting method to use before clustering" help="">
73 <option value="cluster_fast" default="True">Cluster sequences after sorting by length (--cluster-fast)</option>
74 <option value="cluster_smallmem">Cluster already sorted sequences (--cluster-smallmem)</option>
75 </param>
76 <when value="cluster_fast">
77 </when>
78 <when value="cluster_smallmem">
79 </when>
80 </conditional>
81 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False"
82 label="Indicate that input sequences are not presorted by length" help="(--usersort)"/>
68 <expand macro="id_and_iddef" /> 83 <expand macro="id_and_iddef" />
69 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" 84 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False"
70 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> 85 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/>
71 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False"
72 label="Indicate that input sequences are presorted" help="(--usersort)"/>
73 <expand macro="qmask" /> 86 <expand macro="qmask" />
74 <expand macro="sizein" /> 87 <expand macro="sizein" />
75 <expand macro="sizeout" /> 88 <expand macro="sizeout" />
76 <expand macro="strand" /> 89 <expand macro="strand" />
77 <expand macro="maxrejects" /> 90 <expand macro="maxrejects" />
81 <option value="--consout">Cluster consensus sequences</option> 94 <option value="--consout">Cluster consensus sequences</option>
82 <option value="--centroids">Centroid sequences</option> 95 <option value="--centroids">Centroid sequences</option>
83 <option value="--notmatched">Write non-matching query sequences to separate file</option> 96 <option value="--notmatched">Write non-matching query sequences to separate file</option>
84 <option value="--matched">Write matching query sequences to separate file</option> 97 <option value="--matched">Write matching query sequences to separate file</option>
85 </expand> 98 </expand>
99 <expand macro="uclust_like_output" />
86 100
87 </inputs> 101 </inputs>
88 <outputs> 102 <outputs>
89 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> 103 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments">
90 <filter>'--msaout' in outputs</filter> 104 <filter>'--msaout' in outputs</filter>
108 <filter>'--blast6out' in outputs</filter> 122 <filter>'--blast6out' in outputs</filter>
109 </data> 123 </data>
110 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> 124 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
111 <filter>'--fastapairs' in outputs</filter> 125 <filter>'--fastapairs' in outputs</filter>
112 </data> 126 </data>
127 <data name="uc_outfile" format="tabular" label="${tool.name} on ${on_string}: UCLUST like output">
128 <filter>uc is True</filter>
129 </data>
113 </outputs> 130 </outputs>
114 <tests> 131 <tests>
115 <test> 132 <test>
116 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> 133 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
117 <param name="id" value="0.99"/> 134 <param name="id" value="0.99"/>
118 <param name="maxaccepts" value="1"/> 135 <param name="maxaccepts" value="1"/>
119 <param name="maxrejects" value="2"/> 136 <param name="maxrejects" value="2"/>
120 <param name="sizeout" value="--sizeout"/> 137 <param name="sizeout" value=""/>
121 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> 138 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" />
122 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> 139 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
123 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> 140 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" />
124 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" /> 141 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" />
125 <!-- The result following result files would be too big --> 142 <!-- The result following result files would be too big -->
126 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /--> 143 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /-->
127 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /--> 144 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /-->
128 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> 145 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /-->
129 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> 146 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /-->
130 </test> 147 </test>
148 <test>
149 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
150 <param name="clustering_mode_select" value="cluster_smallmem"/>
151 <param name="usersort" value="--usersort"/>
152 <param name="id" value="0.99"/>
153 <param name="maxaccepts" value="1"/>
154 <param name="maxrejects" value="2"/>
155 <param name="sizeout" value="--sizeout"/>
156 <param name="outputs" value="--centroids,--blast6out,--notmatched" />
157 <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" />
158 <output name="blast6out" file="clustering_blast6out_result2.tab" ftype="tabular" />
159 <output name="notmatched" file="clustering_notmatched_result2.fasta" ftype="fasta" />
160 </test>
161 <test>
162 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
163 <param name="clustering_mode_select" value="cluster_smallmem"/>
164 <param name="usersort" value="--usersort"/>
165 <param name="id" value="0.99"/>
166 <param name="maxaccepts" value="1"/>
167 <param name="maxrejects" value="2"/>
168 <param name="sizeout" value="--sizeout"/>
169 <param name="outputs" value="--centroids" />
170 <param name="uc" value="--uc"/>
171 <output name="centroids" file="clustering_centroids_result2.fasta" ftype="fasta" />
172 <output name="uc_outfile" file="clustering_uc_result3.uc" ftype="tabular" />
173 </test>
174 <test>
175 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
176 <param name="clustering_mode_select" value="cluster_smallmem"/>
177 <param name="usersort" value="--usersort"/>
178 <param name="id" value="0.99"/>
179 <param name="maxaccepts" value="1"/>
180 <param name="maxrejects" value="2"/>
181 <param name="sizeout" value="--sizeout"/>
182 <param name="outputs" value="--centroids" />
183 <param name="iddef" value="0"/>
184 <output name="centroids" file="clustering_centroids_result4.fasta" ftype="fasta" />
185 </test>
186 <test>
187 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
188 <param name="clustering_mode_select" value="cluster_fast"/>
189 <param name="usersort" value="--usersort"/>
190 <param name="id" value="0.99"/>
191 <param name="maxaccepts" value="1"/>
192 <param name="maxrejects" value="2"/>
193 <param name="sizeout" value=""/>
194 <param name="outputs" value="--centroids" />
195 <param name="qmask" value="none"/>
196 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
197 </test>
131 </tests> 198 </tests>
132 <help> 199 <help>
133 <![CDATA[ 200 <![CDATA[
134 **What it does** 201 **What it does**
135 202
137 implemented in usearch, DNAclust and sumaclust for example. 204 implemented in usearch, DNAclust and sumaclust for example.
138 205
139 206
140 Clustering options (most searching options also apply) 207 Clustering options (most searching options also apply)
141 --centroids FILENAME output centroid sequences to FASTA file 208 --centroids FILENAME output centroid sequences to FASTA file
142 --cluster_fast FILENAME cluster sequences fast 209 --cluster_fast FILENAME cluster sequences after sorting by length
143 --cluster_smallmem FILENAME cluster sequences using a small amount of memory 210 --cluster_size FILENAME cluster sequences after sorting by abundance
211 --cluster_smallmem FILENAME cluster already sorted sequences (see -usersort)
144 --clusters STRING output each cluster to a separate FASTA file 212 --clusters STRING output each cluster to a separate FASTA file
145 --consout FILENAME output cluster consensus sequences to FASTA file 213 --consout FILENAME output cluster consensus sequences to FASTA file
146 --cons_truncate do not ignore terminal gaps in MSA for consensus 214 --cons_truncate do not ignore terminal gaps in MSA for consensus
147 --id REAL reject if identity lower 215 --id REAL reject if identity lower
148 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) 216 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
149 --msaout FILENAME output multiple seq. alignments to FASTA file 217 --msaout FILENAME output multiple seq. alignments to FASTA file
150 --qmask mask seqs with dust, soft or no method (dust) 218 --qmask seqs with dust, soft or no method (dust)
151 --sizein read abundance annotation from input 219 --sizein propagate abundance annotation from input
152 --sizeout write cluster abundances to centroid file 220 --sizeout write cluster abundances to centroid file
153 --strand cluster using "plus" or "both" strands (plus) 221 --strand cluster using plus or both strands (plus)
154 --usersort indicate that input sequences are presorted 222 --uc FILENAME filename for UCLUST-like output
223 --usersort indicate sequences not presorted by length
155 224
156 225
157 @EXTERNAL_DOCUMENTATION@ 226 @EXTERNAL_DOCUMENTATION@
158 227
159 ------- 228 -------