Mercurial > repos > iuc > vsearch
comparison clustering.xml @ 0:fae6527990af draft
Imported from capsule None
author | iuc |
---|---|
date | Thu, 21 May 2015 03:58:09 -0400 |
parents | |
children | 8c4e2933a17a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fae6527990af |
---|---|
1 <tool id="vsearch_clustering" name="VSearch clustering" version="@VERSION@.0"> | |
2 <description></description> | |
3 <macros> | |
4 <import>vsearch_macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="stdio" /> | |
8 <expand macro="version_command" /> | |
9 <command> | |
10 <![CDATA[ | |
11 vsearch | |
12 @GENERAL@ | |
13 --cluster_fast "$infile" | |
14 ##--cluster_smallmem FILENAME cluster sequences using a small amount of memory | |
15 ##--clusters STRING output each cluster to a separate FASTA file | |
16 | |
17 #if $maxrejects: | |
18 --maxrejects $maxrejects | |
19 #end if | |
20 #if $maxaccepts: | |
21 --maxaccepts $maxaccepts | |
22 #end if | |
23 | |
24 $cons_truncate | |
25 --id $id | |
26 ##--iddef $iddef | |
27 | |
28 #if '--msaout' in str($outputs): | |
29 --msaout $msaout | |
30 #end if | |
31 #if '--consout' in str($outputs): | |
32 --consout $consout | |
33 #end if | |
34 #if '--centroids' in str($outputs): | |
35 --centroids $centroids | |
36 #end if | |
37 #if '--alnout' in str($outputs): | |
38 --alnout $alnout | |
39 #end if | |
40 #if '--blast6out' in str($outputs): | |
41 --blast6out $blast6out | |
42 #end if | |
43 #if '--notmatched' in str($outputs): | |
44 --notmatched $notmatched | |
45 #end if | |
46 #if '--fastapairs' in str($outputs): | |
47 --fastapairs $fastapairs | |
48 #end if | |
49 #if '--matched' in str($outputs): | |
50 --matched $matched | |
51 #end if | |
52 #if $qmask != 'no': | |
53 --qmask $qmask | |
54 #end if | |
55 #if $sizein: | |
56 --sizein $sizein | |
57 #end if | |
58 #if $sizeout: | |
59 --sizeout $sizeout | |
60 #end if | |
61 --strand $strand | |
62 --usersort $usersort | |
63 | |
64 ]]> | |
65 </command> | |
66 <inputs> | |
67 <param name="infile" type="data" format="fasta" label="Select your FASTA file" help="(--cluster_fast)" /> | |
68 <expand macro="id_and_iddef" /> | |
69 <param name="cons_truncate" type="boolean" truevalue="--cons_truncate" falsevalue="" checked="False" | |
70 label="Do not ignore terminal gaps in MSA for consensus" help="(--cons_truncate)"/> | |
71 <param name="usersort" type="boolean" truevalue="--usersort" falsevalue="" checked="False" | |
72 label="Indicate that input sequences are presorted" help="(--usersort)"/> | |
73 <expand macro="qmask" /> | |
74 <expand macro="sizein" /> | |
75 <expand macro="sizeout" /> | |
76 <expand macro="strand" /> | |
77 <expand macro="maxrejects" /> | |
78 <expand macro="maxaccepts" /> | |
79 <expand macro="general_output"> | |
80 <option value="--msaout">Multiple sequence alignments</option> | |
81 <option value="--consout">Cluster consensus sequences</option> | |
82 <option value="--centroids">Centroid sequences</option> | |
83 <option value="--notmatched">Write non-matching query sequences to separate file</option> | |
84 <option value="--matched">Write matching query sequences to separate file</option> | |
85 </expand> | |
86 | |
87 </inputs> | |
88 <outputs> | |
89 <data name="msaout" format="fasta" label="${tool.name} on ${on_string}: Multiple Sequence Alignments"> | |
90 <filter>'--msaout' in outputs</filter> | |
91 </data> | |
92 <data name="consout" format="fasta" label="${tool.name} on ${on_string}: Consensus Sequences"> | |
93 <filter>'--consout' in outputs</filter> | |
94 </data> | |
95 <data name="centroids" format="fasta" label="${tool.name} on ${on_string}: Cluster centroids"> | |
96 <filter>'--centroids' in outputs</filter> | |
97 </data> | |
98 <data name="alnout" format="fasta" label="${tool.name} on ${on_string}: Alignment"> | |
99 <filter>'--alnout' in outputs</filter> | |
100 </data> | |
101 <data name="notmatched" format="fasta" label="${tool.name} on ${on_string}: Non-matched queries"> | |
102 <filter>'--notmatched' in outputs</filter> | |
103 </data> | |
104 <data name="matched" format="fasta" label="${tool.name} on ${on_string}: Matching query sequences"> | |
105 <filter>'--matched' in outputs</filter> | |
106 </data> | |
107 <data name="blast6out" format="tabular" label="${tool.name} on ${on_string}: BLAST like tabular"> | |
108 <filter>'--blast6out' in outputs</filter> | |
109 </data> | |
110 <data name="fastapairs" format="fasta" label="${tool.name} on ${on_string}: Query/Target sequences"> | |
111 <filter>'--fastapairs' in outputs</filter> | |
112 </data> | |
113 </outputs> | |
114 <tests> | |
115 <test> | |
116 <param name="infile" value="BioMarKs5k.fsa.bz2" ftype="fasta" /> | |
117 <param name="id" value="0.99"/> | |
118 <param name="maxaccepts" value="1"/> | |
119 <param name="maxrejects" value="2"/> | |
120 <param name="sizeout" value="--sizeout"/> | |
121 <param name="outputs" value="--centroids,--alnout,--blast6out,--notmatched" /> | |
122 <output name="centroids" file="clustering_centroids_result1.fasta" ftype="fasta" /> | |
123 <output name="blast6out" file="clustering_blast6out_result1.tab" ftype="tabular" /> | |
124 <output name="notmatched" file="clustering_notmatched_result1.fasta" ftype="fasta" /> | |
125 <!-- The result following result files would be too big --> | |
126 <!--output name="matched" file="clustering_matched_result1.fasta" ftype="fasta" /--> | |
127 <!--output name="alnout" file="clustering_alnout_result1.fasta" lines_diff="2" ftype="fasta" /--> | |
128 <!--output name="fastapairs" file="clustering_fastapairs_result1.fasta" ftype="fasta" /--> | |
129 <!--output name="msaout" file="clustering_msaout_result1.fasta" ftype="fasta" /--> | |
130 </test> | |
131 </tests> | |
132 <help> | |
133 <![CDATA[ | |
134 **What it does** | |
135 | |
136 vsearch implements a single-pass, greedy star-clustering algorithm, similar to the algorithms | |
137 implemented in usearch, DNAclust and sumaclust for example. | |
138 | |
139 | |
140 Clustering options (most searching options also apply) | |
141 --centroids FILENAME output centroid sequences to FASTA file | |
142 --cluster_fast FILENAME cluster sequences fast | |
143 --cluster_smallmem FILENAME cluster sequences using a small amount of memory | |
144 --clusters STRING output each cluster to a separate FASTA file | |
145 --consout FILENAME output cluster consensus sequences to FASTA file | |
146 --cons_truncate do not ignore terminal gaps in MSA for consensus | |
147 --id REAL reject if identity lower | |
148 --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2) | |
149 --msaout FILENAME output multiple seq. alignments to FASTA file | |
150 --qmask mask seqs with dust, soft or no method (dust) | |
151 --sizein read abundance annotation from input | |
152 --sizeout write cluster abundances to centroid file | |
153 --strand cluster using "plus" or "both" strands (plus) | |
154 --usersort indicate that input sequences are presorted | |
155 | |
156 | |
157 @EXTERNAL_DOCUMENTATION@ | |
158 | |
159 ------- | |
160 | |
161 @REFERENCES@ | |
162 | |
163 | |
164 ]]> | |
165 </help> | |
166 <expand macro="citations" /> | |
167 </tool> |