Mercurial > repos > iuc > orthofinder_onlygroups
comparison orthofinder_only_groups.xml @ 0:bfb20dbe1309 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/orthofinder commit 2da91121887cc148ff398ddc2f56142490a8e22f
author | iuc |
---|---|
date | Tue, 24 Oct 2017 06:40:40 -0400 |
parents | |
children | 918d141a166b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bfb20dbe1309 |
---|---|
1 <tool name="OrthoFinder OnlyGroups" id="orthofinder_onlygroups" version="1.1.4"> | |
2 <description>finds orthogroups in a set of proteomes</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.1.4">orthofinder</requirement> | |
5 </requirements> | |
6 <command> | |
7 <![CDATA[ | |
8 ## prepare inputs | |
9 #if $init.start=="fasta": | |
10 #set $infiles = "" | |
11 #for $input in $init.input_fasta | |
12 ln -s '$input' '$input.element_identifier' && | |
13 #set $infiles = $infiles + str($input.element_identifier) + "," | |
14 #end for | |
15 #set $infiles = $infiles[:-1] | |
16 #elif $init.start=="blast": | |
17 #set $infilesbl = "" | |
18 #for $input in $init.input_blast_out | |
19 ln -s '$input' '$input.element_identifier' && | |
20 #set $infilesbl = $infilesbl + str($input.element_identifier) + "," | |
21 #end for | |
22 #set $infilesbl = $infilesbl[:-1] | |
23 | |
24 #set $infilesfa = "" | |
25 #for $input in $init.input_blast_fa | |
26 ln -s '$input' '$input.element_identifier' && | |
27 #set $infilesfa = $infilesfa + str($input.element_identifier) + "," | |
28 #end for | |
29 #set $infilesfa = $infilesfa[:-1] | |
30 | |
31 ln -s $init.specIDs $init.specIDs.element_identifier && | |
32 ln -s $init.seqIDs $init.seqIDs.element_identifier && | |
33 #end if | |
34 | |
35 ## start Orthofinder | |
36 orthofinder | |
37 #if $init.start=="fasta": | |
38 -f . | |
39 #elif $init.start=="blast": | |
40 -b . | |
41 #end if | |
42 | |
43 -I $I -og -t \${GALAXY_SLOTS:-1} -a \${GALAXY_SLOTS:-1} && | |
44 | |
45 #if $init.start=="fasta": | |
46 mv Results_* results | |
47 #if $init.keepblastout=="yes": | |
48 && mkdir -p results/WorkingDirectory/blast results/WorkingDirectory/fa && | |
49 mv results/WorkingDirectory/Blast* results/WorkingDirectory/blast/ && | |
50 mv results/WorkingDirectory/*.fa results/WorkingDirectory/fa/ | |
51 #end if | |
52 #elif $init.start=="blast": | |
53 mkdir results && | |
54 mv *.csv results/ && | |
55 mv Orthogroups.txt results/ | |
56 #end if | |
57 | |
58 ]]> | |
59 </command> | |
60 <inputs> | |
61 <!-- Control where Orthofinder starts --> | |
62 <conditional name="init"> | |
63 <param name="start" type="select" label="Orthofinder starting point" help="OrthoFinder_OnlyGroups works in 2 steps. Choose 'From fasta proteomes' to run OrthoFinder_OnlyGroups from scratch and 'From blast results' if you have all the blast results from a previous OrthoFinder_OnlyGroups run."> | |
64 <option value="fasta" selected="true">From fasta proteomes</option> | |
65 <option value="blast">From blast results</option> | |
66 </param> | |
67 | |
68 <when value="fasta"> | |
69 <param name="input_fasta" type="data" format="fasta" multiple="true" label="Select input fasta proteomes" help="One fasta file per species; species and sequences names in the results will remain the same than in the input files."/> | |
70 <param name="keepblastout" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Do you want to get the blast results ?" help="Used to re-run OrthoFinder_OnlyGroups from pre-computed blast results"/> | |
71 </when> | |
72 | |
73 <when value="blast"> | |
74 <param name="input_blast_out" type="data_collection" collection_type="list" format="txt" label="Select the pre-computed blast files" help="blastX_Y.txt files from the blast output files of a previous OrthoFinder_OnlyGroups run." /> | |
75 <param name="input_blast_fa" type="data_collection" collection_type="list" format="fasta" label="Select the fasta files" help="= SpeciesX.fa files from the blast output files of a previous OrthoFinder_OnlyGroups run." /> | |
76 <param name="specIDs" type="data" format="txt" label="Select the SpeciesIds file" help="SpeciesIDs.txt file from the blast output files of a previous OrthoFinder_OnlyGroups run."/> | |
77 <param name="seqIDs" type="data" format="txt" label="Select the SequencesIds file" help="SequencesIDs.txt file from the blast output files of a previous OrthoFinder_OnlyGroups run."/> | |
78 </when> | |
79 </conditional> | |
80 | |
81 <param argument="-I" type="float" value="1.5" label="Inflation parameter" help="Modify inflation parameter for MCL. Not recommended." /> | |
82 </inputs> | |
83 <outputs> | |
84 <!-- Orthogroups results --> | |
85 <data format="txt" name="orthogroups1" label="Orthogroups.txt" from_work_dir="results/Orthogroups.txt" /> | |
86 <data format="csv" name="orthogroups2" label="Orthogroups.csv" from_work_dir="results/Orthogroups.csv" /> | |
87 <data format="csv" name="specs_overlap" label="Orthogroups_SpeciesOverlaps.csv" from_work_dir="results/Orthogroups_SpeciesOverlaps.csv" /> | |
88 <data format="csv" name="unassigned_genes" label="Orthogroups_UnassignedGenes.csv" from_work_dir="results/Orthogroups_UnassignedGenes.csv" /> | |
89 <data format="csv" name="stat_overall" label="Statistics_Overall.csv" from_work_dir="results/Statistics_Overall.csv" /> | |
90 <data format="csv" name="stat_specs" label="Statistics_PerSpecies.csv" from_work_dir="results/Statistics_PerSpecies.csv" /> | |
91 | |
92 <!-- working directory : blast outputs--> | |
93 <collection name="wdblast" type="list" label="Blast_outputs"> | |
94 <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/blast/" /> | |
95 <filter>init['start']=="fasta" and init['keepblastout']</filter> | |
96 </collection> | |
97 <collection name="wdfasta" type="list" label="Fasta_from_blast" > | |
98 <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/fa/" /> | |
99 <filter>init['start']=="fasta" and init['keepblastout']</filter> | |
100 </collection> | |
101 <data format="txt" name="SpeciesIDs" label="SpeciesIDs.txt" from_work_dir="results/WorkingDirectory/SpeciesIDs.txt" > | |
102 <filter>init['start']=="fasta" and init['keepblastout']</filter> | |
103 </data> | |
104 <data format="txt" name="SequenceIDs" label="SequencesIDs.txt" from_work_dir="results/WorkingDirectory/SequenceIDs.txt" > | |
105 <filter>init['start']=="fasta" and init['keepblastout']</filter> | |
106 </data> | |
107 </outputs> | |
108 <tests> | |
109 <!-- test orthofinder -f -og --> | |
110 <test> | |
111 <conditional name="init"> | |
112 <param name="start" value="fasta" /> | |
113 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" /> | |
114 <param name="keepblastout" value="no" /> | |
115 </conditional> | |
116 <param name="inflation" value="1.5" /> | |
117 <output name="specs_overlap"> | |
118 <assert_contents> | |
119 <has_text text="Mycoplasma_agalactiae"/> | |
120 <has_text text="Mycoplasma_gallisepticum"/> | |
121 <has_text text="Mycoplasma_genitalium"/> | |
122 <has_text text="Mycoplasma_hyopneumoniae"/> | |
123 <has_n_columns n="5"/> | |
124 </assert_contents> | |
125 </output> | |
126 <output name="unassigned_genes"> | |
127 <assert_contents> | |
128 <has_text text="Mycoplasma_agalactiae"/> | |
129 <has_text text="Mycoplasma_gallisepticum"/> | |
130 <has_text text="Mycoplasma_genitalium"/> | |
131 <has_text text="Mycoplasma_hyopneumoniae"/> | |
132 <has_n_columns n="5"/> | |
133 </assert_contents> | |
134 </output> | |
135 <output name="stat_overall"> | |
136 <assert_contents> | |
137 <has_text text="Number of genes in orthogroups"/> | |
138 <has_text text="Number of unassigned genes"/> | |
139 <has_text text="Percentage of orthogroups"/> | |
140 <has_text text="Number of orthogroups"/> | |
141 <has_text text="Number of genes"/> | |
142 <has_text text="G50 (assigned genes)"/> | |
143 <has_text text="G50 (all genes)"/> | |
144 <has_text text="O50 (assigned genes)"/> | |
145 <has_text text="O50 (all genes)"/> | |
146 </assert_contents> | |
147 </output> | |
148 <output name="stat_specs"> | |
149 <assert_contents> | |
150 <has_text text="Mycoplasma_agalactiae"/> | |
151 <has_text text="Mycoplasma_gallisepticum"/> | |
152 <has_text text="Mycoplasma_genitalium"/> | |
153 <has_text text="Mycoplasma_hyopneumoniae"/> | |
154 <has_text text="Number of genes per-species in orthogroup"/> | |
155 <has_text text="Percentage of orthogroups"/> | |
156 <has_text text="Number of orthogroups"/> | |
157 <has_text text="Number of genes"/> | |
158 <has_n_columns n="5"/> | |
159 </assert_contents> | |
160 </output> | |
161 </test> | |
162 | |
163 <test> | |
164 <conditional name="init"> | |
165 <param name="start" value="fasta" /> | |
166 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" /> | |
167 <param name="keepblastout" value="yes" /> | |
168 </conditional> | |
169 <param name="inflation" value="1.5" /> | |
170 <output name="specs_overlap"> | |
171 <assert_contents> | |
172 <has_text text="Mycoplasma_agalactiae"/> | |
173 <has_text text="Mycoplasma_gallisepticum"/> | |
174 <has_text text="Mycoplasma_genitalium"/> | |
175 <has_text text="Mycoplasma_hyopneumoniae"/> | |
176 <has_n_columns n="5"/> | |
177 </assert_contents> | |
178 </output> | |
179 <output name="unassigned_genes"> | |
180 <assert_contents> | |
181 <has_text text="Mycoplasma_agalactiae"/> | |
182 <has_text text="Mycoplasma_gallisepticum"/> | |
183 <has_text text="Mycoplasma_genitalium"/> | |
184 <has_text text="Mycoplasma_hyopneumoniae"/> | |
185 <has_n_columns n="5"/> | |
186 </assert_contents> | |
187 </output> | |
188 <output name="stat_overall"> | |
189 <assert_contents> | |
190 <has_text text="Number of genes in orthogroups"/> | |
191 <has_text text="Number of unassigned genes"/> | |
192 <has_text text="Percentage of orthogroups"/> | |
193 <has_text text="Number of orthogroups"/> | |
194 <has_text text="Number of genes"/> | |
195 <has_text text="G50 (assigned genes)"/> | |
196 <has_text text="G50 (all genes)"/> | |
197 <has_text text="O50 (assigned genes)"/> | |
198 <has_text text="O50 (all genes)"/> | |
199 </assert_contents> | |
200 </output> | |
201 <output name="stat_specs"> | |
202 <assert_contents> | |
203 <has_text text="Mycoplasma_agalactiae"/> | |
204 <has_text text="Mycoplasma_gallisepticum"/> | |
205 <has_text text="Mycoplasma_genitalium"/> | |
206 <has_text text="Mycoplasma_hyopneumoniae"/> | |
207 <has_text text="Number of genes per-species in orthogroup"/> | |
208 <has_text text="Percentage of orthogroups"/> | |
209 <has_text text="Number of orthogroups"/> | |
210 <has_text text="Number of genes"/> | |
211 <has_n_columns n="5"/> | |
212 </assert_contents> | |
213 </output> | |
214 <output name="SpeciesIDs" value="inputs/blastids/SpeciesIDs.txt" /> | |
215 <output name="SequenceIDs" value="inputs/blastids/SequenceIDs.txt" /> | |
216 <output_collection name="wdfasta" type="list" count="4"/> | |
217 <output_collection name="wdblast" type="list" count="16"/> | |
218 </test> | |
219 | |
220 <!-- test orthofinder -b -og --> | |
221 <test> | |
222 <conditional name="init"> | |
223 <param name="start" value="blast" /> | |
224 <param name="input_blast_out"> | |
225 <collection type="list"> | |
226 <element name="Blast0_0.txt" value="inputs/blastout/Blast0_0.txt"/> | |
227 <element name="Blast0_1.txt" value="inputs/blastout/Blast0_1.txt"/> | |
228 <element name="Blast0_2.txt" value="inputs/blastout/Blast0_2.txt"/> | |
229 <element name="Blast0_3.txt" value="inputs/blastout/Blast0_3.txt"/> | |
230 <element name="Blast1_0.txt" value="inputs/blastout/Blast1_0.txt"/> | |
231 <element name="Blast1_1.txt" value="inputs/blastout/Blast1_1.txt"/> | |
232 <element name="Blast1_2.txt" value="inputs/blastout/Blast1_2.txt"/> | |
233 <element name="Blast1_3.txt" value="inputs/blastout/Blast1_3.txt"/> | |
234 <element name="Blast2_0.txt" value="inputs/blastout/Blast2_0.txt"/> | |
235 <element name="Blast2_1.txt" value="inputs/blastout/Blast2_1.txt"/> | |
236 <element name="Blast2_2.txt" value="inputs/blastout/Blast2_2.txt"/> | |
237 <element name="Blast2_3.txt" value="inputs/blastout/Blast2_3.txt"/> | |
238 <element name="Blast3_0.txt" value="inputs/blastout/Blast3_0.txt"/> | |
239 <element name="Blast3_1.txt" value="inputs/blastout/Blast3_1.txt"/> | |
240 <element name="Blast3_2.txt" value="inputs/blastout/Blast3_2.txt"/> | |
241 <element name="Blast3_3.txt" value="inputs/blastout/Blast3_3.txt"/> | |
242 </collection> | |
243 </param> | |
244 <param name="input_blast_fa"> | |
245 <collection type="list"> | |
246 <element name="Species0.fa" value="inputs/blastfa/Species0.fa"/> | |
247 <element name="Species1.fa" value="inputs/blastfa/Species1.fa"/> | |
248 <element name="Species2.fa" value="inputs/blastfa/Species2.fa"/> | |
249 <element name="Species3.fa" value="inputs/blastfa/Species3.fa"/> | |
250 </collection> | |
251 </param> | |
252 <param name="specIDs" ftype="txt" value="inputs/blastids/SpeciesIDs.txt"/> | |
253 <param name="seqIDs" ftype="txt" value="inputs/blastids/SequenceIDs.txt"/> | |
254 </conditional> | |
255 <param name="inflation" value="1.5" /> | |
256 <output name="orthogroups1" value="results_fromblast/Orthogroups.txt"/> | |
257 <output name="orthogroups2" value="results_fromblast/Orthogroups.csv"/> | |
258 <output name="specs_overlap" value="results_fromblast/Orthogroups_SpeciesOverlaps.csv"/> | |
259 <output name="unassigned_genes" value="results_fromblast/Orthogroups_UnassignedGenes.csv"/> | |
260 <output name="stat_overall" value="results_fromblast/Statistics_Overall.csv" lines_diff="2"/> | |
261 <output name="stat_specs" value="results_fromblast/Statistics_PerSpecies.csv"/> | |
262 </test> | |
263 </tests> | |
264 <help> | |
265 ====================== | |
266 OrthoFinder OnlyGroups | |
267 ====================== | |
268 | |
269 Full readme at https://github.com/davidemms/OrthoFinder/blob/master/README.md | |
270 Summary sketch at https://github.com/davidemms/OrthoFinder/blob/master/OrthoFinder-options.pdf | |
271 | |
272 OrthoFinder is a fast, accurate and comprehensive analysis tool for comparative genomics. It finds orthologues and orthogroups infers gene trees for all orthogroups and infers a rooted species tree for the species being analysed. OrthoFinder also provides comprehensive statistics for comparative genomic analyses. OrthoFinder is simple to use and all you need to run it is a set of protein sequence files (one per species) in FASTA format (Emms, D.M. and Kelly, S., 2015). | |
273 | |
274 .. class:: infomark | |
275 | |
276 This galaxy tool implements the first part of the Orthofinder program, e.g. the clustering of orthogroups of genes. | |
277 | |
278 If you have already ran OrthoFinder, the tool allows to re-run the analysis from the pre-computed blast-results. | |
279 | |
280 ----------- | |
281 Input files | |
282 ----------- | |
283 - When using "from fasta" option (e.g Orthofinder from scratch) : the input files are a set of proteomes in fasta format (on file per species). Choose this option if you have no OrthoFinder results yet. | |
284 - When using "from blast results" option : the input files are all the following files from of a previous OrthoFinder run (these files appear only if you have chosen to keep them while launching a previous run): | |
285 - A dataset collection / multiple datasets for the blast outputs | |
286 - A dataset collection / multiple datasets for .fa files | |
287 - The SpeciesIDs.txt file | |
288 - The SequencesIDs.txt file | |
289 | |
290 </help> | |
291 <citations> | |
292 <citation type="doi">10.1186/s13059-015-0721-2</citation> | |
293 </citations> | |
294 </tool> |