comparison orthofinder_only_groups.xml @ 0:bfb20dbe1309 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/orthofinder commit 2da91121887cc148ff398ddc2f56142490a8e22f
author iuc
date Tue, 24 Oct 2017 06:40:40 -0400
parents
children 918d141a166b
comparison
equal deleted inserted replaced
-1:000000000000 0:bfb20dbe1309
1 <tool name="OrthoFinder OnlyGroups" id="orthofinder_onlygroups" version="1.1.4">
2 <description>finds orthogroups in a set of proteomes</description>
3 <requirements>
4 <requirement type="package" version="1.1.4">orthofinder</requirement>
5 </requirements>
6 <command>
7 <![CDATA[
8 ## prepare inputs
9 #if $init.start=="fasta":
10 #set $infiles = ""
11 #for $input in $init.input_fasta
12 ln -s '$input' '$input.element_identifier' &&
13 #set $infiles = $infiles + str($input.element_identifier) + ","
14 #end for
15 #set $infiles = $infiles[:-1]
16 #elif $init.start=="blast":
17 #set $infilesbl = ""
18 #for $input in $init.input_blast_out
19 ln -s '$input' '$input.element_identifier' &&
20 #set $infilesbl = $infilesbl + str($input.element_identifier) + ","
21 #end for
22 #set $infilesbl = $infilesbl[:-1]
23
24 #set $infilesfa = ""
25 #for $input in $init.input_blast_fa
26 ln -s '$input' '$input.element_identifier' &&
27 #set $infilesfa = $infilesfa + str($input.element_identifier) + ","
28 #end for
29 #set $infilesfa = $infilesfa[:-1]
30
31 ln -s $init.specIDs $init.specIDs.element_identifier &&
32 ln -s $init.seqIDs $init.seqIDs.element_identifier &&
33 #end if
34
35 ## start Orthofinder
36 orthofinder
37 #if $init.start=="fasta":
38 -f .
39 #elif $init.start=="blast":
40 -b .
41 #end if
42
43 -I $I -og -t \${GALAXY_SLOTS:-1} -a \${GALAXY_SLOTS:-1} &&
44
45 #if $init.start=="fasta":
46 mv Results_* results
47 #if $init.keepblastout=="yes":
48 && mkdir -p results/WorkingDirectory/blast results/WorkingDirectory/fa &&
49 mv results/WorkingDirectory/Blast* results/WorkingDirectory/blast/ &&
50 mv results/WorkingDirectory/*.fa results/WorkingDirectory/fa/
51 #end if
52 #elif $init.start=="blast":
53 mkdir results &&
54 mv *.csv results/ &&
55 mv Orthogroups.txt results/
56 #end if
57
58 ]]>
59 </command>
60 <inputs>
61 <!-- Control where Orthofinder starts -->
62 <conditional name="init">
63 <param name="start" type="select" label="Orthofinder starting point" help="OrthoFinder_OnlyGroups works in 2 steps. Choose 'From fasta proteomes' to run OrthoFinder_OnlyGroups from scratch and 'From blast results' if you have all the blast results from a previous OrthoFinder_OnlyGroups run.">
64 <option value="fasta" selected="true">From fasta proteomes</option>
65 <option value="blast">From blast results</option>
66 </param>
67
68 <when value="fasta">
69 <param name="input_fasta" type="data" format="fasta" multiple="true" label="Select input fasta proteomes" help="One fasta file per species; species and sequences names in the results will remain the same than in the input files."/>
70 <param name="keepblastout" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Do you want to get the blast results ?" help="Used to re-run OrthoFinder_OnlyGroups from pre-computed blast results"/>
71 </when>
72
73 <when value="blast">
74 <param name="input_blast_out" type="data_collection" collection_type="list" format="txt" label="Select the pre-computed blast files" help="blastX_Y.txt files from the blast output files of a previous OrthoFinder_OnlyGroups run." />
75 <param name="input_blast_fa" type="data_collection" collection_type="list" format="fasta" label="Select the fasta files" help="= SpeciesX.fa files from the blast output files of a previous OrthoFinder_OnlyGroups run." />
76 <param name="specIDs" type="data" format="txt" label="Select the SpeciesIds file" help="SpeciesIDs.txt file from the blast output files of a previous OrthoFinder_OnlyGroups run."/>
77 <param name="seqIDs" type="data" format="txt" label="Select the SequencesIds file" help="SequencesIDs.txt file from the blast output files of a previous OrthoFinder_OnlyGroups run."/>
78 </when>
79 </conditional>
80
81 <param argument="-I" type="float" value="1.5" label="Inflation parameter" help="Modify inflation parameter for MCL. Not recommended." />
82 </inputs>
83 <outputs>
84 <!-- Orthogroups results -->
85 <data format="txt" name="orthogroups1" label="Orthogroups.txt" from_work_dir="results/Orthogroups.txt" />
86 <data format="csv" name="orthogroups2" label="Orthogroups.csv" from_work_dir="results/Orthogroups.csv" />
87 <data format="csv" name="specs_overlap" label="Orthogroups_SpeciesOverlaps.csv" from_work_dir="results/Orthogroups_SpeciesOverlaps.csv" />
88 <data format="csv" name="unassigned_genes" label="Orthogroups_UnassignedGenes.csv" from_work_dir="results/Orthogroups_UnassignedGenes.csv" />
89 <data format="csv" name="stat_overall" label="Statistics_Overall.csv" from_work_dir="results/Statistics_Overall.csv" />
90 <data format="csv" name="stat_specs" label="Statistics_PerSpecies.csv" from_work_dir="results/Statistics_PerSpecies.csv" />
91
92 <!-- working directory : blast outputs-->
93 <collection name="wdblast" type="list" label="Blast_outputs">
94 <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/blast/" />
95 <filter>init['start']=="fasta" and init['keepblastout']</filter>
96 </collection>
97 <collection name="wdfasta" type="list" label="Fasta_from_blast" >
98 <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/fa/" />
99 <filter>init['start']=="fasta" and init['keepblastout']</filter>
100 </collection>
101 <data format="txt" name="SpeciesIDs" label="SpeciesIDs.txt" from_work_dir="results/WorkingDirectory/SpeciesIDs.txt" >
102 <filter>init['start']=="fasta" and init['keepblastout']</filter>
103 </data>
104 <data format="txt" name="SequenceIDs" label="SequencesIDs.txt" from_work_dir="results/WorkingDirectory/SequenceIDs.txt" >
105 <filter>init['start']=="fasta" and init['keepblastout']</filter>
106 </data>
107 </outputs>
108 <tests>
109 <!-- test orthofinder -f -og -->
110 <test>
111 <conditional name="init">
112 <param name="start" value="fasta" />
113 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
114 <param name="keepblastout" value="no" />
115 </conditional>
116 <param name="inflation" value="1.5" />
117 <output name="specs_overlap">
118 <assert_contents>
119 <has_text text="Mycoplasma_agalactiae"/>
120 <has_text text="Mycoplasma_gallisepticum"/>
121 <has_text text="Mycoplasma_genitalium"/>
122 <has_text text="Mycoplasma_hyopneumoniae"/>
123 <has_n_columns n="5"/>
124 </assert_contents>
125 </output>
126 <output name="unassigned_genes">
127 <assert_contents>
128 <has_text text="Mycoplasma_agalactiae"/>
129 <has_text text="Mycoplasma_gallisepticum"/>
130 <has_text text="Mycoplasma_genitalium"/>
131 <has_text text="Mycoplasma_hyopneumoniae"/>
132 <has_n_columns n="5"/>
133 </assert_contents>
134 </output>
135 <output name="stat_overall">
136 <assert_contents>
137 <has_text text="Number of genes in orthogroups"/>
138 <has_text text="Number of unassigned genes"/>
139 <has_text text="Percentage of orthogroups"/>
140 <has_text text="Number of orthogroups"/>
141 <has_text text="Number of genes"/>
142 <has_text text="G50 (assigned genes)"/>
143 <has_text text="G50 (all genes)"/>
144 <has_text text="O50 (assigned genes)"/>
145 <has_text text="O50 (all genes)"/>
146 </assert_contents>
147 </output>
148 <output name="stat_specs">
149 <assert_contents>
150 <has_text text="Mycoplasma_agalactiae"/>
151 <has_text text="Mycoplasma_gallisepticum"/>
152 <has_text text="Mycoplasma_genitalium"/>
153 <has_text text="Mycoplasma_hyopneumoniae"/>
154 <has_text text="Number of genes per-species in orthogroup"/>
155 <has_text text="Percentage of orthogroups"/>
156 <has_text text="Number of orthogroups"/>
157 <has_text text="Number of genes"/>
158 <has_n_columns n="5"/>
159 </assert_contents>
160 </output>
161 </test>
162
163 <test>
164 <conditional name="init">
165 <param name="start" value="fasta" />
166 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
167 <param name="keepblastout" value="yes" />
168 </conditional>
169 <param name="inflation" value="1.5" />
170 <output name="specs_overlap">
171 <assert_contents>
172 <has_text text="Mycoplasma_agalactiae"/>
173 <has_text text="Mycoplasma_gallisepticum"/>
174 <has_text text="Mycoplasma_genitalium"/>
175 <has_text text="Mycoplasma_hyopneumoniae"/>
176 <has_n_columns n="5"/>
177 </assert_contents>
178 </output>
179 <output name="unassigned_genes">
180 <assert_contents>
181 <has_text text="Mycoplasma_agalactiae"/>
182 <has_text text="Mycoplasma_gallisepticum"/>
183 <has_text text="Mycoplasma_genitalium"/>
184 <has_text text="Mycoplasma_hyopneumoniae"/>
185 <has_n_columns n="5"/>
186 </assert_contents>
187 </output>
188 <output name="stat_overall">
189 <assert_contents>
190 <has_text text="Number of genes in orthogroups"/>
191 <has_text text="Number of unassigned genes"/>
192 <has_text text="Percentage of orthogroups"/>
193 <has_text text="Number of orthogroups"/>
194 <has_text text="Number of genes"/>
195 <has_text text="G50 (assigned genes)"/>
196 <has_text text="G50 (all genes)"/>
197 <has_text text="O50 (assigned genes)"/>
198 <has_text text="O50 (all genes)"/>
199 </assert_contents>
200 </output>
201 <output name="stat_specs">
202 <assert_contents>
203 <has_text text="Mycoplasma_agalactiae"/>
204 <has_text text="Mycoplasma_gallisepticum"/>
205 <has_text text="Mycoplasma_genitalium"/>
206 <has_text text="Mycoplasma_hyopneumoniae"/>
207 <has_text text="Number of genes per-species in orthogroup"/>
208 <has_text text="Percentage of orthogroups"/>
209 <has_text text="Number of orthogroups"/>
210 <has_text text="Number of genes"/>
211 <has_n_columns n="5"/>
212 </assert_contents>
213 </output>
214 <output name="SpeciesIDs" value="inputs/blastids/SpeciesIDs.txt" />
215 <output name="SequenceIDs" value="inputs/blastids/SequenceIDs.txt" />
216 <output_collection name="wdfasta" type="list" count="4"/>
217 <output_collection name="wdblast" type="list" count="16"/>
218 </test>
219
220 <!-- test orthofinder -b -og -->
221 <test>
222 <conditional name="init">
223 <param name="start" value="blast" />
224 <param name="input_blast_out">
225 <collection type="list">
226 <element name="Blast0_0.txt" value="inputs/blastout/Blast0_0.txt"/>
227 <element name="Blast0_1.txt" value="inputs/blastout/Blast0_1.txt"/>
228 <element name="Blast0_2.txt" value="inputs/blastout/Blast0_2.txt"/>
229 <element name="Blast0_3.txt" value="inputs/blastout/Blast0_3.txt"/>
230 <element name="Blast1_0.txt" value="inputs/blastout/Blast1_0.txt"/>
231 <element name="Blast1_1.txt" value="inputs/blastout/Blast1_1.txt"/>
232 <element name="Blast1_2.txt" value="inputs/blastout/Blast1_2.txt"/>
233 <element name="Blast1_3.txt" value="inputs/blastout/Blast1_3.txt"/>
234 <element name="Blast2_0.txt" value="inputs/blastout/Blast2_0.txt"/>
235 <element name="Blast2_1.txt" value="inputs/blastout/Blast2_1.txt"/>
236 <element name="Blast2_2.txt" value="inputs/blastout/Blast2_2.txt"/>
237 <element name="Blast2_3.txt" value="inputs/blastout/Blast2_3.txt"/>
238 <element name="Blast3_0.txt" value="inputs/blastout/Blast3_0.txt"/>
239 <element name="Blast3_1.txt" value="inputs/blastout/Blast3_1.txt"/>
240 <element name="Blast3_2.txt" value="inputs/blastout/Blast3_2.txt"/>
241 <element name="Blast3_3.txt" value="inputs/blastout/Blast3_3.txt"/>
242 </collection>
243 </param>
244 <param name="input_blast_fa">
245 <collection type="list">
246 <element name="Species0.fa" value="inputs/blastfa/Species0.fa"/>
247 <element name="Species1.fa" value="inputs/blastfa/Species1.fa"/>
248 <element name="Species2.fa" value="inputs/blastfa/Species2.fa"/>
249 <element name="Species3.fa" value="inputs/blastfa/Species3.fa"/>
250 </collection>
251 </param>
252 <param name="specIDs" ftype="txt" value="inputs/blastids/SpeciesIDs.txt"/>
253 <param name="seqIDs" ftype="txt" value="inputs/blastids/SequenceIDs.txt"/>
254 </conditional>
255 <param name="inflation" value="1.5" />
256 <output name="orthogroups1" value="results_fromblast/Orthogroups.txt"/>
257 <output name="orthogroups2" value="results_fromblast/Orthogroups.csv"/>
258 <output name="specs_overlap" value="results_fromblast/Orthogroups_SpeciesOverlaps.csv"/>
259 <output name="unassigned_genes" value="results_fromblast/Orthogroups_UnassignedGenes.csv"/>
260 <output name="stat_overall" value="results_fromblast/Statistics_Overall.csv" lines_diff="2"/>
261 <output name="stat_specs" value="results_fromblast/Statistics_PerSpecies.csv"/>
262 </test>
263 </tests>
264 <help>
265 ======================
266 OrthoFinder OnlyGroups
267 ======================
268
269 Full readme at https://github.com/davidemms/OrthoFinder/blob/master/README.md
270 Summary sketch at https://github.com/davidemms/OrthoFinder/blob/master/OrthoFinder-options.pdf
271
272 OrthoFinder is a fast, accurate and comprehensive analysis tool for comparative genomics. It finds orthologues and orthogroups infers gene trees for all orthogroups and infers a rooted species tree for the species being analysed. OrthoFinder also provides comprehensive statistics for comparative genomic analyses. OrthoFinder is simple to use and all you need to run it is a set of protein sequence files (one per species) in FASTA format (Emms, D.M. and Kelly, S., 2015).
273
274 .. class:: infomark
275
276 This galaxy tool implements the first part of the Orthofinder program, e.g. the clustering of orthogroups of genes.
277
278 If you have already ran OrthoFinder, the tool allows to re-run the analysis from the pre-computed blast-results.
279
280 -----------
281 Input files
282 -----------
283 - When using "from fasta" option (e.g Orthofinder from scratch) : the input files are a set of proteomes in fasta format (on file per species). Choose this option if you have no OrthoFinder results yet.
284 - When using "from blast results" option : the input files are all the following files from of a previous OrthoFinder run (these files appear only if you have chosen to keep them while launching a previous run):
285 - A dataset collection / multiple datasets for the blast outputs
286 - A dataset collection / multiple datasets for .fa files
287 - The SpeciesIDs.txt file
288 - The SequencesIDs.txt file
289
290 </help>
291 <citations>
292 <citation type="doi">10.1186/s13059-015-0721-2</citation>
293 </citations>
294 </tool>