comparison gubbins.xml @ 0:637ec5d5368c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gubbins commit 98be6edc2544513347149b50e0cfb530811f890a
author iuc
date Fri, 23 Jun 2017 08:29:43 -0400
parents
children 96e6283e4745
comparison
equal deleted inserted replaced
-1:000000000000 0:637ec5d5368c
1 <tool id="gubbins" name="Gubbins" version="0.1.0">
2 <description>Recombination detection in Bacteria</description>
3 <requirements>
4 <requirement type="package" version="2.2.1">gubbins</requirement>
5 </requirements>
6
7 <version_command>run_gubbins.py --version</version_command>
8
9 <command detect_errors="exit_code"><![CDATA[
10
11 ln -s '$alignment_file' foo.aln &&
12
13 run_gubbins.py
14
15 --threads \${GALAXY_SLOTS:-1}
16
17 #if $adv.iters
18 -i '$adv.iters'
19 #end if
20
21 #if $adv.conv_meth
22 -z '$adv.conv_meth'
23 #end if
24
25 #if $adv.outgroup
26 -o '$adv.outgroup'
27 #end if
28
29 #if $really_adv.tree_builder
30 -t '$really_adv.tree_builder'
31 #end if
32
33 #if $really_adv.min_snps
34 -m '$really_adv.min_snps'
35 #end if
36
37 #if $really_adv.filter_percentage
38 -f '$really_adv.filter_percentage'
39 #end if
40
41 #if $really_adv.min_window_size
42 -a '$really_adv.min_window_size'
43 #end if
44
45 #if $really_adv.max_window_size
46 -b '$really_adv.max_window_size'
47 #end if
48
49 $really_adv.remove_duplicates
50
51 foo.aln
52
53 ]]></command>
54 <inputs>
55 <!-- Just the data set.. -->
56 <param type="data" name="alignment_file" format="fasta" label="Whole genome alignment file" help="Whole genome alignment file in fasta format"/>
57
58 <!-- output file picker -->
59 <param type="select" name="outfiles" multiple="true" display="checkboxes" label="Select the required output files" help="Default selections are the Final Tree in Newick format, the Recombination Predictions in gff3 format and the Summary of SNP Distribution">
60 <option value="ftree" selected="true">Final Tree in newick format</option>
61 <option value="gff" selected="true">Recombination Predictions in gff3 format</option>
62 <option value="vcf" selected="true">Summary of SNP Distribution in vcf format</option>
63 <option value="recomb_embl">Recombination Predictions in embl format</option>
64 <option value="fpoly">Filtered Polymorphic Sites in fasta format</option>
65 <option value="ppoly">Filtered Polymorphic Sites in phylip format</option>
66 <option value="stats">Per Branch Statistics in csv format</option>
67 <option value="baseb">Base Branch Reconstruction in embl format</option>
68 </param>
69
70 <!-- A semi advanced section, really shouldn't have to change anything. -->
71 <section name="adv" title="Advanced options" expanded="True">
72 <param name="iters" type="integer" label="Iterations" value="5" help="Maximum No. of iterations, default is 5" />
73 <param name="conv_meth" type="select" label="Model Conversion Options" help="Criteria to use to know when to halt iterations">
74 <option value="weighted_robinson_foulds" selected="True">Weighted Robinson Foulds</option>
75 <option value="robinson_foulds">Robinson Foulds</option>
76 <option value="recombination">Recombination</option>
77 </param>
78 <param name="outgroup" type="text" label="Outgroup" value="" help="Outgroup name for rerooting. A list of comma separated names can be used if they form a clade."/>
79 </section>
80
81 <!-- a really advanced section, play with this at your peril! -->
82 <section name="really_adv" title="Really advanced options - change these if you really know what you are doing." expanded="false">
83 <param name="tree_builder" type="select" label="Tree builder" help="Application to use for tree building, default RAxML">
84 <option value="raxml" selected="true">RAxML</option>
85 <option value="fasttree">FastTree</option>
86 <option value="hybrid">Hybrid</option>
87 </param>
88 <param name="min_snps" type="integer" label="Minimum SNPS" value="3" help="Min SNPs to identify a recombination block, default is 3" />
89 <param name="filter_percentage" type="integer" label="Filter Percentage" value="25" help="Filter out taxa with more than this percentage of gaps, default is 25" />
90 <param name="min_window_size" type="integer" label="Minimum Window Size" value="100" help="Minimum window size, default 100" />
91 <param name="max_window_size" type="integer" label="Maximum Window Size" value="10000" help="Maximum window size, default 10000" />
92 <param name="remove_duplicates" type="boolean" label="Remove identical sequences" falsevalue="" truevalue="-d" />
93 </section>
94
95 </inputs>
96
97 <outputs>
98 <data format="txt" name="final_tree" label="${tool.name} on ${on_string} Final Tree" from_work_dir="foo.final_tree.tre" >
99 <filter>outfiles and 'ftree' in outfiles</filter>
100 </data>
101 <data format="gff3" name="recomb_pred_gff" label="${tool.name} on ${on_string} Recombinations Prediction gff" from_work_dir="foo.recombination_predictions.gff" >
102 <filter>outfiles and 'gff' in outfiles</filter>
103 </data>
104 <data format="embl" name="recomb_pred_embl" label="${tool.name} on ${on_string} Recombinations Prediction embl" from_work_dir="foo.recombination_predictions.embl" >
105 <filter>outfiles and 'recomb_embl' in outfiles</filter>
106 </data>
107 <data format="fasta" name="filt_polymorph_fna" label="${tool.name} on ${on_string} Filtered Polymorphic Sites fasta" from_work_dir="foo.filtered_polymorphic_sites.fasta" >
108 <filter>outfiles and 'fpoly' in outfiles</filter>
109 </data>
110 <data format="phylip" name="filt_polymorph_phy" label="${tool.name} on ${on_string} Filtered Polymorphic Sites phylip" from_work_dir="foo.filtered_polymorphic_sites.phylip" >
111 <filter>outfiles and 'ppoly' in outfiles</filter>
112 </data>
113 <data format="csv" name="per_b_stat_csv" label="${tool.name} on ${on_string} Per Branch Statistics csv" from_work_dir="foo.per_branch_statistics.csv" >
114 <filter>outfiles and 'stats' in outfiles</filter>
115 </data>
116 <data format="vcf" name="sum_snp_vcf" label="${tool.name} on ${on_string} Summary of SNP Distribution vcf" from_work_dir="foo.summary_of_snp_distribution.vcf" >
117 <filter>outfiles and 'vcf' in outfiles</filter>
118 </data>
119 <data format="embl" name="base_branch_embl" label="${tool.name} on ${on_string} Branch Base Reconstruction embl" from_work_dir="foo.branch_base_reconstruction.embl" >
120 <filter>outfiles and 'baseb' in outfiles</filter>
121 </data>
122 </outputs>
123
124
125 <tests>
126 <test>
127 <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
128 <output name="recomb_pred_gff">
129 <assert_contents>
130 <has_text text="##gff-version 3" />
131 </assert_contents>
132 </output>
133 <output name="sum_snp_vcf">
134 <assert_contents>
135 <has_text text="##fileformat=VCFv4.2" />
136 </assert_contents>
137 </output>
138 </test>
139
140 <test>
141 <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
142 <param name="iters" value="3"/>
143 <output name="recomb_pred_gff">
144 <assert_contents>
145 <has_text text="##gff-version 3" />
146 </assert_contents>
147 </output>
148 <output name="sum_snp_vcf">
149 <assert_contents>
150 <has_text text="##fileformat=VCFv4.2" />
151 </assert_contents>
152 </output>
153 </test>
154
155 <test>
156 <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
157 <param name="conv_meth" value="recombination" />
158 <output name="recomb_pred_gff">
159 <assert_contents>
160 <has_text text="##gff-version 3" />
161 </assert_contents>
162 </output>
163 <output name="sum_snp_vcf">
164 <assert_contents>
165 <has_text text="##fileformat=VCFv4.2" />
166 </assert_contents>
167 </output>
168 </test>
169
170 <test>
171 <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
172 <param name="conv_meth" value="recombination" />
173 <param name="outfiles" value="gff,vcf,ftree,recomb_embl,fpoly,ppoly,stats,baseb"/>
174 <output name="recomb_pred_gff">
175 <assert_contents>
176 <has_text text="##gff-version 3" />
177 </assert_contents>
178 </output>
179 <output name="sum_snp_vcf">
180 <assert_contents>
181 <has_text text="##fileformat=VCFv4.2" />
182 </assert_contents>
183 </output>
184 <output name="filt_polymorph_fna">
185 <assert_contents>
186 <has_text text=">sequence_1" />
187 </assert_contents>
188 </output>
189 <output name="recomb_pred_embl">
190 <assert_contents>
191 <has_text text="FT" />
192 </assert_contents>
193 </output>
194 <output name="filt_polymorph_phy">
195 <assert_contents>
196 <has_text text="sequence_1" />
197 </assert_contents>
198 </output>
199 <output name="per_b_stat_csv">
200 <assert_contents>
201 <has_text text="Node" />
202 </assert_contents>
203 </output>
204 <output name="base_branch_embl">
205 <assert_contents>
206 <has_text text="FT" />
207 </assert_contents>
208 </output>
209 <output name="final_tree">
210 <assert_contents>
211 <has_text text="((sequence_" />
212 </assert_contents>
213 </output>
214 </test>
215 </tests>
216
217
218 <help><![CDATA[
219 **Gubbins**
220
221 Since the introduction of high-throughput, second-generation DNA sequencing technologies, there has been an enormous increase in the size of datasets being used for estimating bacterial population phylodynamics. Although many phylogenetic techniques are scalable to hundreds of bacterial genomes, methods which have been used for mitigating the effect of mechanisms of horizontal sequence transfer on phylogenetic reconstructions cannot cope with these new datasets. Gubbins (Genealogies Unbiased By recomBinations In Nucleotide Sequences) is an algorithm that iteratively identifies loci containing elevated densities of base substitutions while concurrently constructing a phylogeny based on the putative point mutations outside of these regions. Simulations demonstrate the algorithm generates highly accurate reconstructions under realistic models of short-term bacterial evolution, and can be run in only a few hours on alignments of hundreds of bacterial genome sequences.
222
223 **Running Gubbins**
224
225 To run Gubbins with default settings:
226
227 Supply a "fasta" genome alignment file and press execute.
228
229 **Other options**
230
231 **Advanced**
232
233 *Iterations*, (-i)
234
235 The maximum number of iterations to perform; the algorithm will stop earlier than this if it converges on the same tree in two successive iterations. Default is 5.
236
237 *Converge_method* (-z)
238
239 Criteria to use to know when to halt iterations [weighted_robinson_foulds|robinson_foulds|recombination]. Default is weighted_robinson_foulds.
240
241 *Outgroup*, (-o)
242
243 The name of a sequence in the alignment on which to root the tree
244
245 **Really Advanced**
246
247 These options are here for completeness and you shouldn't need to change them from their defaults. You really need to know what you are doing before you use these.
248
249 *Tree builder*, (-t)
250
251 The algorithm to use in the construction of phylogenies in the analysis; can be ‘raxml’, to use RAxML, ‘fasttree’, to use Fasttree, or ‘hybrid’, to use Fasttree for the first iteration and RAxML in all subsequent iterations. Default is raxml
252
253 Filter Percentage, (-f)
254
255 Filter out taxa with more than this percentage of missing data. Default is 25%
256
257 *Minimum snps*, (-m)
258
259 The minimum number of base substitutions required to identify a recombination. Default is 3.
260
261
262 And others...
263
264
265
266 **Output files**
267
268 * Recombination predictions in EMBL tab file format.
269
270
271 * Recombination predictions in GFF3 format
272
273
274 * Base substitution reconstruction in EMBL tab format.
275
276
277 * VCF file summarising the distribution of SNPs
278
279
280 * Per branch reporting of the base substitutions inside and outside recombinations events.
281
282
283 * FASTA format alignment of filtered polymorphic sites used to generate the phylogeny in the final iteration.
284
285
286 * Phylip format alignment of filtered polymorphic sites used to generate the phylogeny in the final iteration.
287
288
289 * Final phylogenetic tree in newick format.
290
291 ]]></help>
292
293
294 <citations>
295 <citation type="doi">doi:10.1093/nar/gku1196</citation>
296 </citations>
297
298 </tool>