6
|
1 <tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0">
|
0
|
2 <description>aligns integrated orthologous gene family clusters</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
6
|
6 <requirements>
|
|
7 <requirement type="package" version="1.0.3">plant_tribes_gene_family_aligner</requirement>
|
|
8 </requirements>
|
0
|
9 <command detect_errors="exit_code"><![CDATA[
|
|
10 #set alignment_method = $alignment_method_cond.alignment_method
|
6
|
11 #set input_dir = 'input_dir'
|
|
12 mkdir $input_dir &&
|
|
13 #for $i in $input:
|
|
14 #set filename = $i.file_name
|
|
15 #set name = $i.name
|
|
16 ln -s $filename $input_dir/$name &&
|
|
17 #end for
|
8
|
18 export TOOLS_DIR=\$(dirname `which GeneFamilyAligner`) &&
|
|
19 export PASTA_TOOLS_RUNDIR=\$TOOLS_DIR &&
|
|
20 export PASTA_TOOLS_DEVDIR=\$TOOLS_DIR &&
|
6
|
21 GeneFamilyAligner
|
0
|
22 --alignment_method $alignment_method
|
|
23 #if str($alignment_method) == 'pasta':
|
|
24 --pasta_script_path '$__tool_directory__/run_pasta.py'
|
|
25 --pasta_iter_limit $alignment_method_cond.pasta_iter_limit
|
|
26 #end if
|
|
27 --num_threads \${GALAXY_SLOTS:-4}
|
6
|
28 --orthogroup_faa '$input_dir'
|
|
29 #if str($codon_alignments) == 'yes':
|
|
30 --codon_alignments
|
0
|
31 #end if
|
|
32 #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences
|
|
33 #if str($remove_gappy_sequences) == 'yes':
|
|
34 #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond
|
|
35 #set trim_type = $trim_type_cond.trim_type
|
|
36 #if str($trim_type) == 'gap_trimming' and str($trim_type_cond.gap_trimming):
|
|
37 --gap_trimming $trim_type_cond.gap_trimming
|
|
38 #else:
|
|
39 ## str($trim_type) == 'automated_trimming'
|
6
|
40 --automated_trimming
|
0
|
41 #end if
|
|
42 #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond
|
|
43 #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps
|
|
44 #if str($remove_sequences_with_gaps) == 'yes':
|
|
45 #if str($remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of):
|
|
46 --remove_sequences $remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of
|
|
47 #end if
|
|
48 #if str($remove_sequences_with_gaps_cond.iterative_realignment):
|
|
49 --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment
|
|
50 #end if
|
|
51 #end if
|
|
52 #end if
|
7
|
53 &>gene_family_aligner_log.txt;
|
|
54 if [[ $? -ne 0 ]]; then
|
|
55 find geneFamilyAlignments_dir -type d -maxdepth 1 -exec cp gene_family_aligner_log.txt {} \;
|
|
56 exit 1;
|
|
57 fi
|
0
|
58 ]]></command>
|
|
59 <inputs>
|
6
|
60 <param name="input" format="fasta" type="data_collection" collection_type="list" label="Integrated orthogroup fasta files" />
|
|
61 <conditional name="alignment_method_cond">
|
|
62 <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
|
|
63 <option value="mafft" selected="true">MAFFT</option>
|
|
64 <option value="pasta">PASTA</option>
|
0
|
65 </param>
|
6
|
66 <when value="mafft" />
|
|
67 <when value="pasta">
|
|
68 <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
|
0
|
69 </when>
|
|
70 </conditional>
|
6
|
71 <param name="codon_alignments" type="select" label="Codon alignments">
|
5
|
72 <option value="no" selected="true">No</option>
|
|
73 <option value="yes">Yes</option>
|
|
74 </param>
|
6
|
75 <conditional name="remove_gappy_sequences_cond">
|
|
76 <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
|
|
77 <option value="no" selected="true">No</option>
|
|
78 <option value="yes">Yes</option>
|
|
79 </param>
|
|
80 <when value="no" />
|
|
81 <when value="yes">
|
|
82 <conditional name="trim_type_cond">
|
|
83 <param name="trim_type" type="select" label="Trimming method">
|
|
84 <option value="gap_trimming" selected="true">Gap score based trimming</option>
|
|
85 <option value="automated_trimming">Automated heuristic trimming</option>
|
|
86 </param>
|
|
87 <when value="gap_trimming">
|
|
88 <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
|
|
89 </when>
|
|
90 <when value="automated_trimming" />
|
|
91 </conditional>
|
|
92 <conditional name="remove_sequences_with_gaps_cond">
|
|
93 <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
|
|
94 <option value="no" selected="true">No</option>
|
|
95 <option value="yes">Yes</option>
|
|
96 </param>
|
|
97 <when value="no" />
|
|
98 <when value="yes">
|
|
99 <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
|
|
100 <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
|
|
101 </when>
|
|
102 </conditional>
|
|
103 <param name="output_pristine_alignments" type="select" display="radio" label="Output primary and intermediate alignments?" help="In addition to trimmed/filtered alignments">
|
|
104 <option value="no" selected="true">No</option>
|
|
105 <option value="yes">Yes</option>
|
|
106 </param>
|
|
107 </when>
|
|
108 </conditional>
|
0
|
109 </inputs>
|
|
110 <outputs>
|
6
|
111 <collection name="primary_faa" type="list" label="${tool.name} (primary orthogroup protein alignments) on ${on_string}">
|
|
112 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_faa" format="fasta" />
|
|
113 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
|
|
114 </collection>
|
|
115 <collection name="primary_fna" type="list" label="${tool.name} (primary orthogroup codon alignments) on ${on_string}">
|
|
116 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_fna" format="fasta" />
|
|
117 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
|
|
118 </collection>
|
|
119 <collection name="pristine" type="list" label="${tool.name} (intermediate alignments) on ${on_string}">
|
|
120 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/other_orthogroups_aln" format="fasta" />
|
|
121 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['output_pristine_alignments'] == 'yes'</filter>
|
2
|
122 </collection>
|
6
|
123 <collection name="trimmed_faa" type="list" label="${tool.name} (trimmed orthogroup protein alignments) on ${on_string}">
|
|
124 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_faa" format="fasta" />
|
|
125 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
|
|
126 </collection>
|
|
127 <collection name="trimmed_fna" type="list" label="${tool.name} (trimmed orthogroup codon alignments) on ${on_string}">
|
|
128 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_fna" format="fasta" />
|
|
129 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
|
|
130 </collection>
|
|
131 <collection name="filtered_faa" type="list" label="${tool.name} (filtered orthogroup protein alignments) on ${on_string}">
|
|
132 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_faa" format="fasta" />
|
|
133 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
|
|
134 </collection>
|
|
135 <collection name="filtered_fna" type="list" label="${tool.name} (filtered orthogroup codon alignments) on ${on_string}">
|
|
136 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_fna" format="fasta" />
|
|
137 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
|
|
138 </collection>
|
0
|
139 </outputs>
|
|
140 <tests>
|
|
141 <test>
|
6
|
142 <param name="input">
|
|
143 <collection type="list">
|
|
144 <element name="3722.faa" value="3722.faa"/>
|
|
145 <element name="3722.fna" value="3722.fna"/>
|
|
146 <element name="38889.faa" value="38889.faa"/>
|
|
147 <element name="38889.fna" value="38889.fna"/>
|
|
148 <element name="39614.faa" value="39614.faa"/>
|
|
149 <element name="39614.fna" value="39614.fna"/>
|
|
150 </collection>
|
|
151 </param>
|
|
152 <param name="codon_alignments" value="yes"/>
|
|
153 <output_collection name="primary_faa" type="list">
|
|
154 <element name="3722.faa.aln" file="3722.faa.aln" ftype="fasta"/>
|
|
155 <element name="38889.faa.aln" file="38889.faa.aln" ftype="fasta"/>
|
|
156 <element name="39614.faa.aln" file="39614.faa.aln" ftype="fasta"/>
|
|
157 </output_collection>
|
|
158 <output_collection name="primary_fna" type="list">
|
|
159 <element name="3722.fna.aln" file="3722.fna.aln" ftype="fasta"/>
|
|
160 <element name="38889.fna.aln" file="38889.fna.aln" ftype="fasta"/>
|
|
161 <element name="39614.fna.aln" file="39614.fna.aln" ftype="fasta"/>
|
|
162 </output_collection>
|
0
|
163 </test>
|
|
164 </tests>
|
|
165 <help>
|
|
166 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
|
|
167 analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments
|
|
168 of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool.
|
|
169
|
|
170 -----
|
|
171
|
|
172 **Required options**
|
|
173
|
6
|
174 * **Integrated orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyIntegrator tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.
|
0
|
175
|
|
176 * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.
|
|
177
|
|
178 - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.
|
|
179
|
6
|
180 * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires both protein and their corresponding coding sequence orthogroup fasta files to be present in the GeneFamilyAligner input data that was produced by the GeneFamilyIntegrator.
|
0
|
181
|
|
182 **Other options**
|
|
183
|
|
184 * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options.
|
|
185
|
|
186 - **Trimming method** - multiple sequence alignment trimming method. PlantTribes trims alignments using two automated approaches implemented in trimAl. Gap score based trimming removes alignments sites that do not achieve a user specified gap score. For example, a setting of 0.1 removes sites that have gaps in 90% or more of the sequences in the multiple sequence alignment. The automated heuristic trimming approach determines the best automated trimAl method to trim a given alignment as described in the trimAl tutorial `trimAl`_.
|
|
187
|
|
188 - **Gap score** - the fraction of sequences with gap allowed in an alignment site. The score is restricted to the range 0.0 - 1.0. Zero value has no effect.
|
|
189
|
|
190 - **Remove sequences** - select 'Yes' to remove sequences in multiple sequence alignments that do not achieve a user specified alignment coverage score. For example, a setting of 0.7 removes sequences with more than 30% gaps in the alignment. This option requires one of the trimming methods to be set.
|
|
191
|
|
192 - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect.
|
|
193
|
|
194 - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect.
|
|
195
|
6
|
196 * **Output primary and intermediate alignments** - selecting 'Yes' will produce a dataset collection of primary and intermediate alignments, the elements of which can be viewed with viaula tools, in addition to the final trimmed and/or filtered alignments dataset collection.
|
2
|
197
|
0
|
198 .. _trimAl: http://trimal.cgenomics.org
|
|
199
|
|
200 </help>
|
|
201 <citations>
|
|
202 <expand macro="citation1" />
|
|
203 <citation type="bibtex">
|
|
204 @article{Wall2008,
|
|
205 journal = {Nucleic Acids Research},
|
|
206 author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
|
|
207 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
|
|
208 year = {2008},
|
|
209 volume = {36},
|
|
210 number = {suppl 1},
|
|
211 pages = {D970-D976},}
|
|
212 </citation>
|
|
213 <citation type="bibtex">
|
|
214 @article{Katoh2013,
|
|
215 journal = {Molecular biology and evolution},
|
|
216 author = {3. Katoh K, Standley DM},
|
|
217 title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
|
|
218 year = {2013},
|
|
219 volume = {30},
|
|
220 number = {4},
|
|
221 pages = {772-780},}
|
|
222 </citation>
|
|
223 <citation type="bibtex">
|
|
224 @article{Mirarab2014,
|
|
225 journal = {Research in Computational Molecular Biology (RECOMB)},
|
|
226 author = {4. Mirarab S, Nguyen N, Warnow T},
|
|
227 title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)},
|
|
228 year = {2014},
|
|
229 pages = {177–191},
|
|
230 url = {https://github.com/smirarab/pasta},}
|
|
231 </citation>
|
|
232 <citation type="bibtex">
|
|
233 @article{Capella-Gutierrez2009,
|
|
234 journal = {Bioinformatics,},
|
|
235 author = {5. Capella-Gutierrez S, Silla-Martínez JM, Gabaldón T},
|
|
236 title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},
|
|
237 year = {2009},
|
|
238 volume = {25},
|
|
239 number = {15},
|
|
240 pages = {1972-1973},}
|
|
241 </citation>
|
3
|
242 <citation type="bibtex">
|
|
243 @article{Yachdav2016,
|
|
244 journal = {Bioinformatics,},
|
|
245 author = {6. Yachdav G, Wilzbach S, Rauscher B, Sheridan R, Sillitoe I, Procter J, Lewis SE, Rost B, Goldberg T},
|
|
246 title = {MSAViewer: interactive JavaScript visualization of multiple sequence alignments},
|
|
247 year = {2016},
|
|
248 volume = {32},
|
|
249 number = {22},
|
|
250 pages = {3501-3503},}
|
|
251 </citation>
|
|
252 <citation type="bibtex">
|
|
253 @article{Clamp2004,
|
|
254 journal = {Bioinformatics,},
|
|
255 author = {7. Clamp M, Cuff J, Searle SM, Barton GJ},
|
|
256 title = {The jalview java alignment editor},
|
|
257 year = {2004},
|
|
258 volume = {20},
|
|
259 number = {3},
|
|
260 pages = {426-427},}
|
|
261 </citation>
|
0
|
262 </citations>
|
|
263 </tool>
|