3
|
1 <tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.3.0">
|
0
|
2 <description>classifies gene sequences into pre-computed orthologous gene family clusters</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
3
|
6 <requirements>
|
|
7 <requirement type="package" version="1.0.3">plant_tribes_gene_family_classifier</requirement>
|
|
8 </requirements>
|
0
|
9 <command detect_errors="exit_code"><![CDATA[
|
|
10 #if str($options_type.options_type_selector) == 'advanced':
|
|
11 #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
|
|
12 #set specify_super_orthogroups = $specify_super_orthogroups_cond.specify_super_orthogroups
|
|
13 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
|
|
14 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
|
|
15 #set specify_single_copy_cond = $options_type.specify_single_copy_cond
|
|
16 #set specify_single_copy = $specify_single_copy_cond.specify_single_copy
|
|
17 #if str($specify_super_orthogroups) == 'yes':
|
|
18 #set specify_super_orthos = True
|
|
19 #set super_orthogroups = $specify_super_orthogroups_cond.super_orthogroups
|
|
20 #else:
|
|
21 #set specify_super_orthos = False
|
|
22 #end if
|
|
23 #if str($create_orthogroup) == 'yes':
|
|
24 #set create_ortho_sequences = True
|
|
25 #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond
|
|
26 #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes':
|
|
27 #set create_corresponding_coding_sequences = True
|
|
28 #else:
|
|
29 #set create_corresponding_coding_sequences = False
|
|
30 #end if
|
|
31 #else:
|
|
32 #set create_ortho_sequences = False
|
|
33 #set create_corresponding_coding_sequences = False
|
|
34 #end if
|
|
35 #if str($specify_single_copy) == 'yes':
|
|
36 #set single_copy_orthogroup = True
|
|
37 #set single_copy_cond = $specify_single_copy_cond.single_copy_cond
|
|
38 #set single_copy = $single_copy_cond.single_copy
|
|
39 #else:
|
|
40 #set single_copy_orthogroup = False
|
|
41 #end if
|
|
42 #else:
|
|
43 #set single_copy_orthogroup = False
|
|
44 #set create_ortho_sequences = False
|
|
45 #set create_corresponding_coding_sequences = False
|
|
46 #end if
|
|
47
|
1
|
48 python '$__tool_directory__/gene_family_classifier.py'
|
0
|
49 --input '$input'
|
|
50 --scaffold '$scaffold.fields.path'
|
|
51 --method $method
|
|
52 --classifier $save_hmmscan_log_cond.classifier
|
|
53 --config_dir '$scaffold.fields.path'
|
|
54 --num_threads \${GALAXY_SLOTS:-4}
|
|
55
|
|
56 #if str($options_type.options_type_selector) == 'advanced':
|
|
57 #if specify_super_orthos:
|
|
58 --super_orthogroups $super_orthogroups
|
|
59 #end if
|
|
60 #if $single_copy_orthogroup:
|
|
61 #if str($single_copy) == 'custom':
|
|
62 #set single_copy_custom_cond = $single_copy_cond.single_copy_custom_cond
|
|
63 #set single_copy_custom = $single_copy_custom_cond.single_copy_custom
|
|
64 #if str($single_copy_custom) == 'no':
|
1
|
65 --single_copy_custom default
|
0
|
66 #else:
|
|
67 --single_copy_custom '$single_copy_custom_cond.single_copy_custom_config'
|
|
68 #end if
|
|
69 #else:
|
|
70 #if str($single_copy_cond.single_copy_taxa):
|
|
71 --single_copy_taxa $single_copy_cond.single_copy_taxa
|
|
72 #end if
|
|
73 #if str($single_copy_cond.taxa_present):
|
|
74 --taxa_present $single_copy_cond.taxa_present
|
|
75 #end if
|
|
76 #end if
|
|
77 #end if
|
|
78 #if $create_ortho_sequences:
|
1
|
79 --orthogroup_fasta true
|
0
|
80 #if $create_corresponding_coding_sequences:
|
|
81 --coding_sequences '$create_corresponding_coding_sequences_cond.coding_sequences'
|
|
82 #end if
|
|
83 #end if
|
|
84 #end if
|
|
85
|
|
86 #if (str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both') and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
|
1
|
87 --save_hmmscan_log true
|
3
|
88 --hmmscan_log '$output_hmmscan_log'
|
0
|
89 #end if
|
|
90 ]]></command>
|
|
91 <inputs>
|
|
92 <param name="input" format="fasta" type="data" label="Proteins fasta file"/>
|
5
|
93 <expand macro="param_scaffold" />
|
0
|
94 <param name="method" type="select" label="Protein clustering method">
|
|
95 <option value="gfam" selected="true">GFam</option>
|
|
96 <option value="orthofinder">OrthoFinder</option>
|
|
97 <option value="orthomcl">OrthoMCL</option>
|
|
98 </param>
|
|
99 <conditional name="save_hmmscan_log_cond">
|
|
100 <param name="classifier" type="select" label="Protein classifier">
|
|
101 <option value="blastp" selected="true">blastp</option>
|
|
102 <option value="hmmscan">hmmscan</option>
|
|
103 <option value="both">Both blastp and hmmscan</option>
|
|
104 </param>
|
|
105 <when value="blastp" />
|
|
106 <when value="hmmscan">
|
|
107 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
|
108 <option value="no" selected="true">No</option>
|
|
109 <option value="yes">Yes</option>
|
|
110 </param>
|
|
111 </when>
|
|
112 <when value="both">
|
|
113 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
|
114 <option value="no" selected="true">No</option>
|
|
115 <option value="yes">Yes</option>
|
|
116 </param>
|
|
117 </when>
|
|
118 </conditional>
|
|
119 <conditional name="options_type">
|
|
120 <param name="options_type_selector" type="select" label="Options configuration">
|
|
121 <option value="basic" selected="true">Basic</option>
|
|
122 <option value="advanced">Advanced</option>
|
|
123 </param>
|
|
124 <when value="basic" />
|
|
125 <when value="advanced">
|
|
126 <conditional name="specify_super_orthogroups_cond">
|
|
127 <param name="specify_super_orthogroups" type="select" label="Super orthogroups configuration">
|
|
128 <option value="no" selected="true">No</option>
|
|
129 <option value="yes">Yes</option>
|
|
130 </param>
|
|
131 <when value="no"/>
|
|
132 <when value="yes">
|
|
133 <param name="super_orthogroups" type="select" label="Clustering distance measure">
|
|
134 <option value="min_evalue" selected="true">minimum e-value</option>
|
|
135 <option value="avg_evalue">average e-value</option>
|
|
136 </param>
|
|
137 </when>
|
|
138 </conditional>
|
|
139 <conditional name="specify_single_copy_cond">
|
|
140 <param name="specify_single_copy" type="select" label="Single copy orthogroups configuration">
|
|
141 <option value="no" selected="true">No</option>
|
|
142 <option value="yes">Yes</option>
|
|
143 </param>
|
|
144 <when value="no"/>
|
|
145 <when value="yes">
|
|
146 <conditional name="single_copy_cond">
|
|
147 <param name="single_copy" type="select" label="Selection criterion">
|
|
148 <option value="taxa" selected="true">Global selection</option>
|
|
149 <option value="custom">Custom selection</option>
|
|
150 </param>
|
|
151 <when value="custom">
|
|
152 <conditional name="single_copy_custom_cond">
|
|
153 <param name="single_copy_custom" type="select" label="Custom selection configuration">
|
|
154 <option value="no" selected="true">No</option>
|
|
155 <option value="yes">Yes</option>
|
|
156 </param>
|
|
157 <when value="no"/>
|
|
158 <when value="yes">
|
|
159 <param name="single_copy_custom_config" format="txt" type="data" label="Custom selection file"/>
|
|
160 </when>
|
|
161 </conditional>
|
|
162 </when>
|
|
163 <when value="taxa">
|
|
164 <param name="single_copy_taxa" type="integer" optional="true" min="0" label="Minimum single copy taxa"/>
|
|
165 <param name="taxa_present" type="integer" optional="true" min="0" label="Minimum taxa present"/>
|
|
166 </when>
|
|
167 </conditional>
|
|
168 </when>
|
|
169 </conditional>
|
|
170 <conditional name="create_orthogroup_cond">
|
|
171 <param name="create_orthogroup" type="select" label="Orthogroups fasta configuration">
|
|
172 <option value="no" selected="true">No</option>
|
|
173 <option value="yes">Yes</option>
|
|
174 </param>
|
|
175 <when value="no" />
|
|
176 <when value="yes">
|
|
177 <conditional name="create_corresponding_coding_sequences_cond">
|
|
178 <param name="create_corresponding_coding_sequences" type="select" label="Orthogroups coding sequences">
|
|
179 <option value="no" selected="true">No</option>
|
|
180 <option value="yes">Yes</option>
|
|
181 </param>
|
|
182 <when value="no" />
|
|
183 <when value="yes">
|
|
184 <param name="coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"/>
|
|
185 </when>
|
|
186 </conditional>
|
|
187 </when>
|
|
188 </conditional>
|
|
189 </when>
|
|
190 </conditional>
|
|
191 </inputs>
|
|
192 <outputs>
|
3
|
193 <data name="output_hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
|
0
|
194 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
|
|
195 </data>
|
3
|
196 <collection name="output_orthos" type="list" label="${tool.name} on ${on_string}">
|
|
197 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
|
|
198 </collection>
|
|
199 <collection name="output_orthogroups_fasta" type="list" label="${tool.name} (gene family clusters) on ${on_string}">
|
|
200 <discover_datasets pattern="__name__" directory="output_orthogroups_fasta_dir" visible="false" ext="fasta" />
|
|
201 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter>
|
|
202 </collection>
|
|
203 <collection name="output_single_copy_fasta" type="list" label="${tool.name} (single copy orthogroups) on ${on_string}">
|
|
204 <discover_datasets pattern="__name__" directory="output_single_copy_fasta_dir" visible="false" ext="fasta" />
|
0
|
205 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
|
|
206 </collection>
|
|
207 </outputs>
|
|
208 <tests>
|
|
209 <test>
|
|
210 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta"/>
|
|
211 <param name="scaffold" value="22Gv1.1"/>
|
|
212 <param name="method" value="orthomcl"/>
|
|
213 <param name="classifier" value="both"/>
|
|
214 <param name="options_type_selector" value="advanced"/>
|
|
215 <param name="create_orthogroup" value="yes"/>
|
|
216 <param name="create_corresponding_coding_sequences" value="yes"/>
|
3
|
217 <param name="coding_sequences" value="transcripts.cleaned.nr.cds" ftype="fasta"/>
|
|
218 <output_collection name="output_orthos" type="list">
|
0
|
219 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
|
|
220 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
|
221 <element name="proteins.both.22Gv1.1.bestOrthos" file="proteins.both.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
|
222 <element name="proteins.both.22Gv1.1.bestOrthos.summary" file="proteins.both.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
|
|
223 <element name="proteins.hmmscan.22Gv1.1" file="proteins.hmmscan.22Gv1.1" ftype="tabular" compare="contains"/>
|
|
224 <element name="proteins.hmmscan.22Gv1.1.bestOrthos" file="proteins.hmmscan.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
|
225 </output_collection>
|
3
|
226 <output_collection name="output_orthogroups_fasta" type="list">
|
|
227 <element name="20.faa" file="20.faa" ftype="fasta"/>
|
|
228 <element name="20.fna" file="20.fna" ftype="fasta"/>
|
|
229 <element name="3494.faa" file="3494.faa" ftype="fasta"/>
|
|
230 <element name="3494.fna" file="3494.fna" ftype="fasta"/>
|
|
231 <element name="3722.faa" file="3722.faa" ftype="fasta"/>
|
|
232 <element name="3722.fna" file="3722.fna" ftype="fasta"/>
|
|
233 <element name="38889.faa" file="38889.faa" ftype="fasta"/>
|
|
234 <element name="38889.fna" file="38889.fna" ftype="fasta"/>
|
|
235 <element name="39614.faa" file="39614.faa" ftype="fasta"/>
|
|
236 <element name="39614.fna" file="39614.fna" ftype="fasta"/>
|
|
237 <element name="5235.faa" file="5235.faa" ftype="fasta"/>
|
|
238 <element name="5235.fna" file="5235.fna" ftype="fasta"/>
|
|
239 </output_collection>
|
0
|
240 </test>
|
|
241 </tests>
|
|
242 <help>
|
|
243 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
|
|
244 analyses of genome-scale gene families and transcriptomes. This tool classifies gene coding sequences either produced by
|
|
245 the AssemblyPostProcessor tool or from an external source into pre-computed orthologous gene family clusters (orthogroups)
|
|
246 of a PlantTribes scaffold. Classified sequences are then assigned with the corresponding orthogroups’ metadata that includes
|
|
247 gene counts of backbone taxa, super clusters (super orthogoups) at multiple stringencies, and functional annotations from
|
|
248 sources such as Gene Ontology (GO), InterPro protein domains, TAIR, UniProtKB/TrEMBL, and UniProtKB/Swiss-Prot. Additionally,
|
|
249 sequences belonging to single/low-copy gene families that are mainly utilized in species tree inference can be determined.
|
|
250
|
|
251 -----
|
|
252
|
|
253 **Required options**
|
|
254
|
|
255 * **Proteins fasta file** - proteins fasta file either produced by the AssemblyPostProcessor tool or an external source selected from your history.
|
|
256 * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
|
|
257 * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
|
|
258 * **Protein classifier** - classifier to assign protein sequences into a specified scaffold orthogroups. PlantTribes implements three classification approaches; blastp (faster)[5], hmmscan (slower but more sensitive assignment of divergent homologs)[6], and both blastp and hmmscan (disagreements resolved in favor of hmmscan; more exhaustive).
|
|
259
|
|
260 **Other options**
|
|
261
|
|
262 * **Super orthogroups configuration** - select ‘Yes’ to enable super orthogroups configuration options. Super orthogroups[7] are constructed through a second iteration of MCL clustering to connect distant, but potentially related orthogroup clusters.
|
|
263
|
|
264 * **Clustering distance measure** - distance measure used in merging orthogroup clusters into super orthogroup clusters. PlantTribes pre-computed super orthogroups are based on the minimum and average blastp e-value between all pairs of scaffold orthogroups used as the input matrix for MCL clustering algorithm[8].
|
|
265
|
|
266 * **Single copy orthogroups configuration** - select ‘Yes’ to enable single/low-copy orthogroups selection configuration options.
|
|
267
|
|
268 * **Selection criterion** - single/low-copy orthogroups selection criterion. PlantTribes provides custom and global selection criteria for selecting user-defined single/low-copy scaffold orthogoups.
|
|
269
|
|
270 * **Global selection configuration** - the upper limit values of the following two parameters vary depending on the selected gene family scaffold, and the tool will produce an error if the value exceeds the number of species in the circumscribed scaffold.
|
|
271
|
|
272 * **Minimum single copy taxa** - minimum number of taxa with single copy genes in the orthogroup.
|
|
273 * **Minimum taxa present** - minimum number of taxa present in the orthogroup.
|
|
274
|
|
275 * **Custom selection configuration** - select ‘Yes’ to enable selection of a single copy configuration file. Scaffold configuration templates (.singleCopy.config) of how to customize single/low-copy orthogroups selection can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and also available at the PlantTribes GitHub `repository`_. Single/low-copy settings shown in these templates are used as defaults if ‘No’ is selected.
|
|
276
|
|
277 * **Custom selection file** - select a single/low-copy customized configuration file from your history.
|
|
278
|
|
279 * **Orthogroups fasta configuration** - select ‘Yes’ to create proteins orthogroups fasta files for the classified sequences.
|
|
280
|
|
281 * **Orthogroups coding sequences** - select ‘Yes’ to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. Requires coding sequences fasta file corresponding to the proteins fasta file to be selected from your history.
|
|
282
|
|
283 * **Coding sequences fasta file** - select coding sequences fasta file corresponding to the proteins fasta file from your history.
|
|
284
|
|
285 .. _repository: https://github.com/dePamphilis/PlantTribes/tree/master/config
|
|
286
|
|
287 </help>
|
|
288 <citations>
|
|
289 <expand macro="citation1" />
|
|
290 <citation type="bibtex">
|
|
291 @article{Sasidharan2012,
|
|
292 journal = {Nucleic Acids Research},
|
|
293 author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
|
|
294 title = {GFam: a platform for automatic annotation of gene families},
|
|
295 year = {2012},
|
|
296 pages = {gks631},}
|
|
297 </citation>
|
|
298 <citation type="bibtex">
|
|
299 @article{Li2003,
|
|
300 journal = {Genome Research}
|
|
301 author = {3. Li L, Stoeckert CJ, Roos DS},
|
|
302 title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
|
|
303 year = {2003},
|
|
304 volume = {13},
|
|
305 number = {9},
|
|
306 pages = {2178-2189},}
|
|
307 </citation>
|
|
308 <citation type="bibtex">
|
|
309 @article{Emms2015,
|
|
310 journal = {Genome Biology}
|
|
311 author = {4. Emms DM, Kelly S},
|
|
312 title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
|
|
313 year = {2015},
|
|
314 volume = {16},
|
|
315 number = {1},
|
|
316 pages = {157},}
|
|
317 </citation>
|
|
318 <citation type="bibtex">
|
|
319 @article{Altschul1990,
|
|
320 journal = {Journal of molecular biology}
|
|
321 author = {5. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
|
|
322 title = {Basic local alignment search tool},
|
|
323 year = {1990},
|
|
324 volume = {215},
|
|
325 number = {3},
|
|
326 pages = {403-410},}
|
|
327 </citation>
|
|
328 <citation type="bibtex">
|
|
329 @article{Eddy2009,
|
|
330 journal = {Genome Inform},
|
|
331 author = {6. Eddy SR},
|
|
332 title = {A new generation of homology search tools based on probabilistic inference},
|
|
333 year = {2009},
|
|
334 volume = {23},
|
|
335 number = {1},
|
|
336 pages = {205-211},}
|
|
337 </citation>
|
|
338 <citation type="bibtex">
|
|
339 @article{Wall2008,
|
|
340 journal = {Nucleic Acids Research},
|
|
341 author = {7. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
|
|
342 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
|
|
343 year = {2008},
|
|
344 volume = {36},
|
|
345 number = {suppl 1},
|
|
346 pages = {D970-D976},}
|
|
347 </citation>
|
|
348 <citation type="bibtex">
|
|
349 @article{Enright2002,
|
|
350 journal = {Nucleic acids research},
|
|
351 author = {8. Enright AJ, Van Dongen S, Ouzounis CA},
|
|
352 title = {n efficient algorithm for large-scale detection of protein families},
|
|
353 year = {2002},
|
|
354 volume = {30},
|
|
355 number = {7},
|
|
356 pages = {1575-1584},}
|
|
357 </citation>
|
|
358 </citations>
|
|
359 </tool>
|