comparison clustering.xml @ 0:fae6527990af draft

Imported from capsule None
author iuc
date Thu, 21 May 2015 03:58:09 -0400
parents
children 8c4e2933a17a
comparison
equal deleted inserted replaced
-1:000000000000 0:fae6527990af
1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0">
2 <description></description>
3 <macros>
4 <import>vsearch_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <expand macro="version_command" />
9 <command>
10 <![CDATA[
11 vsearch
12 @GENERAL@
13 --cluster_fast "$infile"
14 ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory
15 ##--clusters STRING output each cluster to a separate FASTA file
16
17 #if $maxrejects:
18 --maxrejects $maxrejects
19 #end if
20 #if $maxaccepts:
21 --maxaccepts $maxaccepts
22 #end if
23
24 $cons_truncate
25 --id $id
26 ##--iddef $iddef
27
28 #if '--msaout' in str($outputs):
29 --msaout $msaout
30 #end if
31 #if '--consout' in str($outputs):
32 --consout $consout
33 #end if
34 #if '--centroids' in str($outputs):
35 --centroids $centroids
36 #end if
37 #if '--alnout' in str($outputs):
38 --alnout $alnout
39 #end if
40 #if '--blast6out' in str($outputs):
41 --blast6out $blast6out
42 #end if
43 #if '--notmatched' in str($outputs):
44 --notmatched $notmatched
45 #end if
46 #if '--fastapairs' in str($outputs):
47 --fastapairs $fastapairs
48 #end if
49 #if '--matched' in str($outputs):
50 --matched $matched
51 #end if
52 #if $qmask != 'no':
53 --qmask $qmask
54 #end if
55 #if $sizein:
56 --sizein $sizein
57 #end if
58 #if $sizeout:
59 --sizeout $sizeout
60 #end if
61 --strand $strand
62 --usersort $usersort
63
64 ]]>
65 </command>
66 <inputs>
67 <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" />
68 <expand macro="id_and_iddef" />
69 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False"
70 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/>
71 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False"
72 label="Indicate that input sequences are presorted" help="(--usersort)"/>
73 <expand macro="qmask" />
74 <expand macro="sizein" />
75 <expand macro="sizeout" />
76 <expand macro="strand" />
77 <expand macro="maxrejects" />
78 <expand macro="maxaccepts" />
79 <expand macro="general_output">
80 <option value="--msaout">Multiple sequence alignments</option>
81 <option value="--consout">Cluster consensus sequences</option>
82 <option value="--centroids">Centroid sequences</option>
83 <option value="--notmatched">Write non-matching query sequences to separate file</option>
84 <option value="--matched">Write matching query sequences to separate file</option>
85 </expand>
86
87 </inputs>
88 <outputs>
89 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments">
90 <filter>'--msaout' in outputs</filter>
91 </data>
92 <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences">
93 <filter>'--consout' in outputs</filter>
94 </data>
95 <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids">
96 <filter>'--centroids' in outputs</filter>
97 </data>
98 <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment">
99 <filter>'--alnout' in outputs</filter>
100 </data>
101 <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries">
102 <filter>'--notmatched' in outputs</filter>
103 </data>
104 <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences">
105 <filter>'--matched' in outputs</filter>
106 </data>
107 <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular">
108 <filter>'--blast6out' in outputs</filter>
109 </data>
110 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences">
111 <filter>'--fastapairs' in outputs</filter>
112 </data>
113 </outputs>
114 <tests>
115 <test>
116 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" />
117 <param name="id" value="0.99"/>
118 <param name="maxaccepts" value="1"/>
119 <param name="maxrejects" value="2"/>
120 <param name="sizeout" value="--sizeout"/>
121 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" />
122 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" />
123 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" />
124 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" />
125 <!-- The result following result files would be too big -->
126 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /-->
127 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /-->
128 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /-->
129 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /-->
130 </test>
131 </tests>
132 <help>
133 <![CDATA[
134 **What it does**
135
136 vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms
137 implemented in usearch, DNAclust and sumaclust for example.
138
139
140 Clustering options (most searching options also apply)
141 --centroids FILENAME output centroid sequences to FASTA file
142 --cluster_fast FILENAME cluster sequences fast
143 --cluster_smallmem FILENAME cluster sequences using a small amount of memory
144 --clusters STRING output each cluster to a separate FASTA file
145 --consout FILENAME output cluster consensus sequences to FASTA file
146 --cons_truncate do not ignore terminal gaps in MSA for consensus
147 --id REAL reject if identity lower
148 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)
149 --msaout FILENAME output multiple seq. alignments to FASTA file
150 --qmask mask seqs with dust, soft or no method (dust)
151 --sizein read abundance annotation from input
152 --sizeout write cluster abundances to centroid file
153 --strand cluster using "plus" or "both" strands (plus)
154 --usersort indicate that input sequences are presorted
155
156
157 @EXTERNAL_DOCUMENTATION@
158
159 -------
160
161 @REFERENCES@
162
163
164 ]]>
165 </help>
166 <expand macro="citations" />
167 </tool>