comparison ConcatPhyl.xml @ 0:b186cae246bd draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:27:42 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b186cae246bd
1 <tool name="ConcatPhyl" id="concatphyl" version="2.0.2">
2
3 <description>
4 Concatenation and phylogeny
5 </description>
6
7 <macros>
8 <import>macros.xml</import>
9 </macros>
10
11 <requirements>
12 <expand macro="python_required" />
13 <requirement type="package" version="8.2.9">raxml</requirement>
14 </requirements>
15
16 <command><![CDATA[
17 #set $infiles_filter_assemblies = ""
18 #for $input_filter_assemblie in $input_filter_assemblies
19 ln -s '$input_filter_assemblie' '$input_filter_assemblie.element_identifier';
20 #set $infiles_filter_assemblies = $infiles_filter_assemblies + $input_filter_assemblie.element_identifier + ","
21 #end for
22 #set $infiles_filter_assemblies = $infiles_filter_assemblies[:-1]
23
24 #for $input_alignment in $input_alignments
25 ln -s '$input_alignment' '$input_alignment.element_identifier';
26 echo '$input_alignment.element_identifier' >> list_files;
27 #end for
28
29 python $__tool_directory__/scripts/S01_concatenate.py
30
31 $infiles_filter_assemblies
32
33 #if $format.format_run == "nucleic" :
34 nucleic
35 #elif $format.format_run == "proteic" :
36 proteic
37 #end if
38
39 list_files
40
41 > ${output};
42
43 raxmlHPC -n galaxy_run
44 #if $format.format_run == "nucleic" :
45 ##-q 05_partitions_gene_NUC
46 -s "03_Concatenation_nuc.phy"
47 -m $format.base_model
48 #elif $format.format_run == "proteic" :
49 ##-q 06_partitions_gene_AA
50 -s 02_Concatenation_aa.phy
51 -m $format.base_model$format.aa_search_matrix
52 #end if
53
54 -p $random_seed
55
56 #if $number_of_runs !="" and $number_of_runs_bootstop =="":
57 -N $number_of_runs
58 -x $rapid_bootstrap_random_seed
59 #elif ($number_of_runs !="" and $number_of_runs_bootstop !="") or ($number_of_runs =="" and $number_of_runs_bootstop !=""):
60 -N $number_of_runs_bootstop
61 -x $rapid_bootstrap_random_seed
62 #end if
63
64 -f $search_algorithm
65
66 >> ${output};
67 ]]>
68 </command>
69
70 <inputs>
71
72 <param name="input_filter_assemblies" type="data" format="fasta" multiple="true" label="Files from Filter assemblies" />
73 <param name="input_alignments" type="data" format="fasta" multiple="true" label="Aligned files without indels" help="nucleic or proteic format according to the analysis you want to do below"/>
74
75 <conditional name="format">
76 <param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? ">
77 <option value="nucleic">Nucleic format</option>
78 <option value="proteic">Proteic format</option>
79 </param>
80
81 <when value="nucleic">
82 <param name="base_model" type="select" label="Substitution Model">
83 <option value="GTRCAT">GTRCAT</option>
84 <option value="GTRCATI">GTRCATI</option>
85 <option value="GTRGAMMA" selected="true">GTRGAMMA</option>
86 <option value="GTRGAMMAI">GTRGAMMAI</option>
87 </param>
88 </when>
89
90 <when value="proteic">
91 <param name="base_model" type="select" label="Substitution Model (-m)">
92 <option value="PROTCAT" selected="true">PROTCAT</option>
93 <option value="PROTCATI">PROTCATI</option>
94 <option value="PROTGAMMA">PROTGAMMA</option>
95 <option value="PROTGAMMAI">PROTGAMMAI</option>
96 </param>
97 <param name="aa_search_matrix" type="select" label="Matrix">
98 <option value="DAYHOFF" selected="true">DAYHOFF</option>
99 <option value="JTT">JTT</option>
100 <option value="WAG">WAG</option>
101 <option value="BLOSUM62">BLOSUM62</option>
102 </param>
103 </when>
104 </conditional>
105
106 <param name="random_seed" type="integer" value="1234567890" size="12" label="Random seed used for the parsimony inferences" />
107
108 <!-- ## (-N/#) -->
109 <param name="number_of_runs" type="integer" size="8" value="100"
110 label="Number of runs" help="Specify the number of
111 alternative runs (-N|#) on distinct starting trees In combination
112 with the '-b' option will invoke a multiple boostrap analysis.
113 You can add the bootstopping criteria by choosing the autoMR,
114 autoMRE, autoMRE_IGN, or autoFC value in a menu below instead of
115 providing a number here. Bootstopping will only work in
116 combination with '-x' or '-b'."
117 optional="True" />
118 <param name="number_of_runs_bootstop" type="select" label="Use bootstopping criteria for number of runs" optional="True">
119 <option value="" selected="yes"></option>
120 <option value="autoMR">autoMR</option>
121 <option value="autoMRE">autoMRE</option>
122 <option value="autoMRE_IGN">autoMRE_IGN</option>
123 <option value="autoFC">autoFC</option>
124 </param>
125
126 <!-- ## (-f) -->
127 <param name="search_algorithm" type="select" label="Algorithm to execute" optional="True">
128 <option value="a" selected="true">Rapid bootstrap and best ML tree search (a)</option>
129 <option value="A">Compute marginal ancestral states (A)</option>
130 <option value="b">Draw bipartition information (b)</option>
131 <option value="c">Check if the alignment can be read (c)</option>
132 <option value="d">Hill-climbing ML Search (d) (default)</option>
133 <option value="e">Optimize GAMMA/GAMMAI model/branches (e)</option>
134 <option value="g">Compute per-site log likelihoods for -z trees (g)</option>
135 <option value="h">Compute log likelihood test for -t / -z trees (h)</option>
136 <option value="j">Generate bootstrapped alignment files (j)</option>
137 <option value="J">Compute SH-like support values for the -t tree (J)</option>
138 <option value="m">Compare bipartitions between -t and -z trees (m)</option>
139 <option value="n">Compute log likelihood score for -z trees (n)</option>
140 <option value="o">Use old slower search algorithm (o)</option>
141 <option value="p">Stepwise MP addition of new sequences (p)</option>
142 <option value="q">Fast quartet calculator (q)</option>
143 <option value="r">Compute pairwise RF distances in -z trees (r)</option>
144 <option value="s">Split a multi-gene alignment (s)</option>
145 <option value="S">Compute site-specific placement bias (S)</option>
146 <option value="t">Randomized tree searches on a fixed starting tree (t)</option>
147 <option value="T">Final optimization of a ML tree from a bootstrap (T)</option>
148 <option value="u">Morphological weight calibration using ML on a -t tree (u)</option>
149 <option value="v">Classify environmental sequences (v)</option>
150 <option value="w">Compute ELW-test on -z trees (w)</option>
151 <option value="x">Compute GAMMA model pair-wise ML distances on a tree (x)</option>
152 <option value="y">Classify environmental sequences into a reference tree (y)</option>
153 </param>
154
155 <!-- ## (-q) -->
156 <param name="multiple_model" format="txt" type="data" label="Multiple model assignment to alignment partitions" optional="True" help="Specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual." />
157
158 <!-- ## (-x) -->
159 <param name="rapid_bootstrap_random_seed" type="integer" value='12345' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." />
160
161 <param name="out" type="select" label="What format of file do you want for your output (concatenation of the sequences) ? ">
162 <option value="nothing">No output</option>
163 <option value="fasta" selected="true">Fasta format</option>
164 <option value="phylip">Phylip format</option>
165 <option value="nexus">Nexus format</option>
166 </param>
167
168 <param name="raxml1" type="boolean" checked="True" label="Do you want the output of RAxML : best tree ? " />
169 <param name="raxml3" type="boolean" label="Do you want the output of RAxML : bi-partition ? " />
170 <param name="raxml4" type="boolean" label="Do you want the output of RAxML : bootstrap ? " help="Only if the option 'rapid bootsptrap' is chosen. When you don't want to choose your options, this output is accessible"/>
171
172 </inputs>
173
174 <outputs>
175 <data name="output" format="txt" label="Phylogeny"/>
176
177 <data name="out_fasta_aa" format="fasta" label="Phylogeny_concatenation_fasta_aa" from_work_dir="02_Concatenation_aa.fas">
178 <filter>format['format_run'] == "proteic" and out == "fasta"</filter>
179 </data>
180
181 <data name="out_phylip_aa" format="phylip" label="Phylogeny_concatenation_phylip_aa" from_work_dir="02_Concatenation_aa.phy">
182 <filter>format['format_run'] == "proteic" and out == "phylip"</filter>
183 </data>
184
185 <data name="out_nexus_aa" format="nexus" label="Phylogeny_concatenation_nexus_aa" from_work_dir="02_Concatenation_aa.nex">
186 <filter>format['format_run'] == "proteic" and out == "nexus"</filter>
187 </data>
188
189 <data name="out_fasta_nuc" format="fasta" label="Phylogeny_concatenation_fasta_nuc" from_work_dir="03_Concatenation_nuc.fas">
190 <filter>format['format_run'] == "nucleic" and out == "fasta"</filter>
191 </data>
192
193 <data name="out_phylip_nuc" format="phylip" label="Phylogeny_concatenation_phylip_nuc" from_work_dir="03_Concatenation_nuc.phy">
194 <filter>format['format_run'] == "nucleic" and out == "phylip"</filter>
195 </data>
196
197 <data name="out_nexus_nuc" format="nexus" label="Phylogeny_concatenation_nexus_nuc" from_work_dir="03_Concatenation_nuc.nex">
198 <filter>format['format_run'] == "nucleic" and out == "nexus"</filter>
199 </data>
200
201 <data name="out_raxml1" format="nhx" label="Phylogeny_RAxML_BestTree" from_work_dir="RAxML_bestTree.galaxy_run">
202 <filter>raxml1 == True</filter>
203 </data>
204
205 <data name="out_raxml3" format="nhx" label="Phylogeny_RAxML_BiPartition" from_work_dir="RAxML_bipartitions.galaxy_run">
206 <filter>raxml3 == True</filter>
207 </data>
208
209 <data name="out_raxml4" format="txt" label="Phylogeny_RAxML_BootStrap" from_work_dir="RAxML_bootstrap.galaxy_run">
210 <filter>raxml4 == True</filter>
211 </data>
212 </outputs>
213
214 <tests>
215 <test>
216 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
217 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
218 <conditional name="format">
219 <param name="format_run" value="nucleic" />
220 <param name="base_model" value="GTRGAMMA" />
221 </conditional>
222 <param name="random_seed" value="1234567890" />
223 <param name="number_of_runs" value="100" />
224 <param name="number_of_runs_bootstop" value="" />
225 <param name="search_algorithm" value="d" />
226 <!-- <param name="multiple_model" value="" /> -->
227 <param name="rapid_bootstrap_random_seed" value="123456789" />
228 <param name="out" value="nothing" />
229 <param name="raxml1" value="True" />
230 <param name="raxml3" value="True" />
231 <param name="raxml4" value="True" />
232 <output name="out_raxml4">
233 <assert_contents>
234 <has_text text="((Pg,(Am,Th)),(Ph,Ap),Ac);"/>
235 <has_text text="((Th,(Pg,Am)),(Ph,Ap),Ac);"/>
236 <has_text text="((Ph,Ap),(Am,(Pg,Th)),Ac);"/>
237 </assert_contents>
238 </output>
239 </test>
240
241 <test>
242 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
243 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
244 <conditional name="format">
245 <param name="format_run" value="nucleic" />
246 <param name="base_model" value="GTRGAMMA" />
247 </conditional>
248 <param name="random_seed" value="1234567890" />
249 <param name="number_of_runs" value="100" />
250 <param name="number_of_runs_bootstop" value="" />
251 <param name="search_algorithm" value="a" />
252 <param name="rapid_bootstrap_random_seed" value="1234567890" />
253 <param name="out" value="nothing" />
254 <param name="raxml1" value="True" />
255 <param name="raxml3" value="True" />
256 <param name="raxml4" value="True" />
257 <output name="out_raxml1" value="RAxML_bestTree.nwk"/>
258 <output name="out_raxml3" value="RAxML_bipartitions.nwk"/>
259 </test>
260
261 <test>
262 <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
263 <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
264 <conditional name="format">
265 <param name="format_run" value="nucleic" />
266 <param name="base_model" value="GTRGAMMA" />
267 </conditional>
268 <param name="random_seed" value="1234567890" />
269 <param name="number_of_runs" value="100" />
270 <param name="number_of_runs_bootstop" value="autoMR" />
271 <param name="search_algorithm" value="a" />
272 <param name="rapid_bootstrap_random_seed" value="1234567890" />
273 <param name="out" value="nothing" />
274 <param name="raxml1" value="True" />
275 <param name="raxml3" value="True" />
276 <param name="raxml4" value="True" />
277 <output name="out_raxml1" value="RAxML_bestTree_test3.nwk"/>
278 <output name="out_raxml3" value="RAxML_bipartitions_test3.nwk"/>
279 </test>
280 </tests>
281
282 <help>
283
284 @HELP_AUTHORS@
285
286 <![CDATA[
287
288 **Description**
289
290 This tool takes files containing fasta sequences (from the CDS_Search in the AdaptSearch suite) and run RAxML to build a phylogeny.
291
292 .. class:: infomark
293
294 full RAxML manual here_
295
296 .. _here: https://sco.h-its.org/exelixis/resource/download/NewManual.pdf
297
298 --------
299
300 **Parameters**
301
302 - The choice of the format sequences is possible : **proteic** or **nucleic**
303
304 - Several RAxML parameters can be set :
305
306 - Substitution model (-m) : Model of Binary (Morphological), Nucleotide, Multi-state, or Amino-Acid substitution
307 Default : GTRGAMMA (nucleic), PROTCAT (proteic).
308
309 - Matrix : AA substitution model (when proteic inputs)
310 Default : DAYHOFF
311
312 - random seed : Specifies a random number seed for the parsimony inferences. For all options/algorithms in RAxML that require some sort of randomization, this option must be specified. Make sure to pass different random number seeds to RAxML and not only 12345.
313
314 - Number of runs (-N) : Specifies the number of alternative runs.
315 By default it's an integer of value 100.
316
317 - Use bootstopping criteria for number of runs :
318 If selected, overxwrites the number of runs to use bootstopping criteria.
319
320 - Algorithm to execute (-f) : allows to choose what kind of algorithme RAxML shall execute.
321 Default : Rapid bootsrap and best ML tree search (-f a).
322
323 - Multiple model assignement t oalignment partitions (-q) : an optional parameter. Permits to specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual.
324 This option allows you to specify the regions of your alignment for which an individual model of nucleotide substitution should be estimated. This will typically be useful to infer trees for long multi-gene alignments.
325
326 - Rapid bootstrapping random seed (-x) : Specify an integer number (random seed) and turn on rapid bootstrapping.
327 In addition to the best tree search.
328 By default, this option is choosen.
329
330 --------
331
332 **Inputs**
333
334 - Files from Filter Assemblies : a set of fasta files (one file per species), e.g. the outputs of the first tool of the AdaptSearch suite.
335 Used to retrieve all the species names.
336
337 - Alignment files without indels : a set of fasta files with aligned sequences (with the same species than into the previous parameter), e.g the outputs of the CDS_Search tool of the AdaptSearch suite.
338
339 --------
340
341 **Outputs**
342
343 This tool, produces the following files :
344
345 - Phylogeny :
346 the general output. It gives the information about the concatenation (statistics) and the RAxML run.
347
348 - Phylogeny_concatenation_fasta_aa :
349 contains the sequences concatenated in fasta format when you choose the option proteic.
350
351 - Phylogeny_concatenation_phylip_aa :
352 contains the sequences concatenated in phylip format when you choose the option proteic.
353
354 - Phylogeny_concatenation_nexus_aa :
355 contains the sequences concatenated in nexus format when you choose the option proteic.
356
357 - Phylogeny_concatenation_fasta_nuc :
358 contains the sequences concatenated in fasta format when you choose the option nucleic.
359
360 - Phylogeny_concatenation_phylip_nuc :
361 contains the sequences concatenated in phylip format when you choose the option nucleic.
362 it's this output which is used for the RAxML run.
363
364 - Phylogeny_concatenation_nexus_nuc :
365 contains the sequences concatenated in nexus format when you choose the option nucleic.
366
367 - Phylogeny_RAxML_BestTree** :
368 the output of RAxML run which contains the Best Tree found.
369
370 - Phylogeny_RAxML_BiPartitionBranchLabel :
371 the output of RAxML run which contains the Best Tree found with supported values as branch labels.
372
373 - Phylogeny_RAxML_BiPartition :
374 the output of RAxML run which contains the Best Tree found with supported values.
375
376 - Phylogeny_RAxML_BootStrap :
377 the output of RAxML run which contains all the boostrapped trees. The number of boostraped trees depending of the option -N (number of run).
378
379 ---------
380
381 **The AdaptSearch Pipeline**
382
383 .. image:: adaptsearch_picture_helps.png
384
385 ---------
386
387 Changelog
388 ---------
389
390 **Version 2.0 - 06/07/2017**
391
392 - NEW: Replace the zip between tools by Dataset Collection
393
394 **Version 1.0 - 13/04/2017**
395
396 - Add funtional test with planemo
397 - Planemo test with conda dependencies for raxml and python
398 - Scripts renamed + symlinks to the directory 'scripts'
399
400 ]]>
401
402 </help>
403
404 <citations>
405 <citation type="doi">10.1093/bioinformatics/btu033</citation>
406 </citations>
407
408 </tool>