comparison gene_family_aligner.xml @ 6:3384b6a842b0 draft

Uploaded
author greg
date Mon, 30 Oct 2017 09:52:00 -0400
parents a73c2e65098e
children 2ac7090847f9
comparison
equal deleted inserted replaced
5:a73c2e65098e 6:3384b6a842b0
1 <tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2"> 1 <tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0">
2 <description>aligns integrated orthologous gene family clusters</description> 2 <description>aligns integrated orthologous gene family clusters</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements_gene_family_aligner" /> 6 <requirements>
7 <requirement type="package" version="1.0.3">plant_tribes_gene_family_aligner</requirement>
8 </requirements>
7 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
8 #set input_format = $input_format_cond.input_format
9 #set alignment_method_cond = $input_format_cond.alignment_method_cond
10 #set alignment_method = $alignment_method_cond.alignment_method 10 #set alignment_method = $alignment_method_cond.alignment_method
11 #if str($input_format_cond.input_format) == 'ptortho': 11 #set input_dir = 'input_dir'
12 #set output_codon_alignments = False 12 mkdir $input_dir &&
13 #else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no': 13 #for $i in $input:
14 #set output_codon_alignments = False 14 #set filename = $i.file_name
15 #else: 15 #set name = $i.name
16 #set output_codon_alignments = True 16 ln -s $filename $input_dir/$name &&
17 #end if 17 #end for
18 18 GeneFamilyAligner
19 python '$__tool_directory__/gene_family_aligner.py'
20 --alignment_method $alignment_method 19 --alignment_method $alignment_method
21 #if str($alignment_method) == 'pasta': 20 #if str($alignment_method) == 'pasta':
22 --pasta_script_path '$__tool_directory__/run_pasta.py' 21 --pasta_script_path '$__tool_directory__/run_pasta.py'
23 --pasta_iter_limit $alignment_method_cond.pasta_iter_limit 22 --pasta_iter_limit $alignment_method_cond.pasta_iter_limit
24 #end if 23 #end if
25 --num_threads \${GALAXY_SLOTS:-4} 24 --num_threads \${GALAXY_SLOTS:-4}
26 #if str($input_format) == 'ptortho': 25 --orthogroup_faa '$input_dir'
27 --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path' 26 #if str($codon_alignments) == 'yes':
28 #else: 27 --codon_alignments
29 ## str($input_format) == 'ptorthocs'
30 --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path'
31 #if str($input_format_cond.codon_alignments) == 'yes':
32 --codon_alignments true
33 #end if
34 #end if 28 #end if
35 #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences 29 #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences
36 #if str($remove_gappy_sequences) == 'yes': 30 #if str($remove_gappy_sequences) == 'yes':
37 #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond 31 #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond
38 #set trim_type = $trim_type_cond.trim_type 32 #set trim_type = $trim_type_cond.trim_type
39 #if str($trim_type) == 'gap_trimming' and str($trim_type_cond.gap_trimming): 33 #if str($trim_type) == 'gap_trimming' and str($trim_type_cond.gap_trimming):
40 --gap_trimming $trim_type_cond.gap_trimming 34 --gap_trimming $trim_type_cond.gap_trimming
41 #else: 35 #else:
42 ## str($trim_type) == 'automated_trimming' 36 ## str($trim_type) == 'automated_trimming'
43 --automated_trimming true 37 --automated_trimming
44 #end if 38 #end if
45 #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond 39 #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond
46 #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps 40 #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps
47 #if str($remove_sequences_with_gaps) == 'yes': 41 #if str($remove_sequences_with_gaps) == 'yes':
48 #if str($remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of): 42 #if str($remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of):
49 --remove_sequences $remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of 43 --remove_sequences $remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of
50 #end if 44 #end if
51 #if str($remove_sequences_with_gaps_cond.iterative_realignment): 45 #if str($remove_sequences_with_gaps_cond.iterative_realignment):
52 --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment 46 --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment
53 #end if 47 #end if
54 #if $output_codon_alignments:
55 --output '$output_aln_filtered_ca'
56 --output_dir '$output_aln_filtered_ca.files_path'
57 #else:
58 --output '$output_aln_filtered'
59 --output_dir '$output_aln_filtered.files_path'
60 #end if
61 #else:
62 #if $output_codon_alignments:
63 --output '$output_aln_trimmed_ca'
64 --output_dir '$output_aln_trimmed_ca.files_path'
65 #else:
66 --output '$output_aln_trimmed'
67 --output_dir '$output_aln_trimmed.files_path'
68 #end if
69 #end if
70 #else:
71 #if $output_codon_alignments:
72 --output '$output_aln_ca'
73 --output_dir '$output_aln_ca.files_path'
74 #else:
75 --output '$output_aln'
76 --output_dir '$output_aln.files_path'
77 #end if 48 #end if
78 #end if 49 #end if
79 #if str($output_dataset_collection) == 'yes': 50 &>proc.log
80 --output_dataset_collection dataset_collection
81 #end if
82 ]]></command> 51 ]]></command>
83 <inputs> 52 <inputs>
84 <conditional name="input_format_cond"> 53 <param name="input" format="fasta" type="data_collection" collection_type="list" label="Integrated orthogroup fasta files" />
85 <param name="input_format" type="select" label="Classified orthogroup fasta files"> 54 <conditional name="alignment_method_cond">
86 <option value="ptortho">Proteins orthogroup fasta files</option> 55 <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
87 <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option> 56 <option value="mafft" selected="true">MAFFT</option>
57 <option value="pasta">PASTA</option>
88 </param> 58 </param>
89 <when value="ptortho"> 59 <when value="mafft" />
90 <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files"> 60 <when value="pasta">
91 <validator type="empty_extra_files_path" /> 61 <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
92 </param>
93 <expand macro="cond_alignment_method" />
94 </when>
95 <when value="ptorthocs">
96 <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files">
97 <validator type="empty_extra_files_path" />
98 </param>
99 <expand macro="cond_alignment_method" />
100 <expand macro="param_codon_alignments" />
101 </when> 62 </when>
102 </conditional> 63 </conditional>
103 <expand macro="cond_remove_gappy_sequences" /> 64 <param name="codon_alignments" type="select" label="Codon alignments">
104 <param name="output_dataset_collection" type="select" display="radio" label="Output additional dataset collection of files?">
105 <option value="no" selected="true">No</option> 65 <option value="no" selected="true">No</option>
106 <option value="yes">Yes</option> 66 <option value="yes">Yes</option>
107 </param> 67 </param>
68 <conditional name="remove_gappy_sequences_cond">
69 <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
70 <option value="no" selected="true">No</option>
71 <option value="yes">Yes</option>
72 </param>
73 <when value="no" />
74 <when value="yes">
75 <conditional name="trim_type_cond">
76 <param name="trim_type" type="select" label="Trimming method">
77 <option value="gap_trimming" selected="true">Gap score based trimming</option>
78 <option value="automated_trimming">Automated heuristic trimming</option>
79 </param>
80 <when value="gap_trimming">
81 <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
82 </when>
83 <when value="automated_trimming" />
84 </conditional>
85 <conditional name="remove_sequences_with_gaps_cond">
86 <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
87 <option value="no" selected="true">No</option>
88 <option value="yes">Yes</option>
89 </param>
90 <when value="no" />
91 <when value="yes">
92 <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
93 <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
94 </when>
95 </conditional>
96 <param name="output_pristine_alignments" type="select" display="radio" label="Output primary and intermediate alignments?" help="In addition to trimmed/filtered alignments">
97 <option value="no" selected="true">No</option>
98 <option value="yes">Yes</option>
99 </param>
100 </when>
101 </conditional>
108 </inputs> 102 </inputs>
109 <outputs> 103 <outputs>
110 <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}"> 104 <collection name="primary_faa" type="list" label="${tool.name} (primary orthogroup protein alignments) on ${on_string}">
111 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> 105 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_faa" format="fasta" />
112 </data> 106 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
113 <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}"> 107 </collection>
114 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> 108 <collection name="primary_fna" type="list" label="${tool.name} (primary orthogroup codon alignments) on ${on_string}">
115 </data> 109 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_fna" format="fasta" />
116 <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}"> 110 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
117 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> 111 </collection>
118 </data> 112 <collection name="pristine" type="list" label="${tool.name} (intermediate alignments) on ${on_string}">
119 <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}"> 113 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/other_orthogroups_aln" format="fasta" />
120 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> 114 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['output_pristine_alignments'] == 'yes'</filter>
121 </data> 115 </collection>
122 <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}"> 116 <collection name="trimmed_faa" type="list" label="${tool.name} (trimmed orthogroup protein alignments) on ${on_string}">
123 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> 117 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_faa" format="fasta" />
124 </data> 118 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
125 <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}"> 119 </collection>
126 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> 120 <collection name="trimmed_fna" type="list" label="${tool.name} (trimmed orthogroup codon alignments) on ${on_string}">
127 </data> 121 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_fna" format="fasta" />
128 <collection name="dataset_collection" type="list" label="${tool.name} (dataset collection) on ${on_string}"> 122 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
129 <discover_datasets pattern="__name__" directory="dataset_collection" format="fasta" /> 123 </collection>
130 <filter>output_dataset_collection == 'yes'</filter> 124 <collection name="filtered_faa" type="list" label="${tool.name} (filtered orthogroup protein alignments) on ${on_string}">
131 </collection> 125 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_faa" format="fasta" />
126 <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
127 </collection>
128 <collection name="filtered_fna" type="list" label="${tool.name} (filtered orthogroup codon alignments) on ${on_string}">
129 <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_fna" format="fasta" />
130 <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
131 </collection>
132
132 </outputs> 133 </outputs>
133 <tests> 134 <tests>
134 <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.
135 <test> 135 <test>
136 <param name="input">
137 <collection type="list">
138 <element name="3722.faa" value="3722.faa"/>
139 <element name="3722.fna" value="3722.fna"/>
140 <element name="38889.faa" value="38889.faa"/>
141 <element name="38889.fna" value="38889.fna"/>
142 <element name="39614.faa" value="39614.faa"/>
143 <element name="39614.fna" value="39614.fna"/>
144 </collection>
145 </param>
146 <param name="codon_alignments" value="yes"/>
147 <output_collection name="primary_faa" type="list">
148 <element name="3722.faa.aln" file="3722.faa.aln" ftype="fasta"/>
149 <element name="38889.faa.aln" file="38889.faa.aln" ftype="fasta"/>
150 <element name="39614.faa.aln" file="39614.faa.aln" ftype="fasta"/>
151 </output_collection>
152 <output_collection name="primary_fna" type="list">
153 <element name="3722.fna.aln" file="3722.fna.aln" ftype="fasta"/>
154 <element name="38889.fna.aln" file="38889.fna.aln" ftype="fasta"/>
155 <element name="39614.fna.aln" file="39614.fna.aln" ftype="fasta"/>
156 </output_collection>
136 </test> 157 </test>
137 -->
138 </tests> 158 </tests>
139 <help> 159 <help>
140 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary 160 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
141 analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments 161 analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments
142 of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool. 162 of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool.
143 163
144 ----- 164 -----
145 165
146 **Required options** 166 **Required options**
147 167
148 * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. 168 * **Integrated orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyIntegrator tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.
149 169
150 * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments. 170 * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.
151 171
152 - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations. 172 - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.
153 173
154 * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires protein and their corresponding coding sequences to be provided as input data. 174 * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires both protein and their corresponding coding sequence orthogroup fasta files to be present in the GeneFamilyAligner input data that was produced by the GeneFamilyIntegrator.
155 175
156 **Other options** 176 **Other options**
157 177
158 * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options. 178 * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options.
159 179
165 185
166 - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect. 186 - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect.
167 187
168 - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect. 188 - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect.
169 189
170 * **Output additional dataset collection of files** - selecting 'Yes' will produce an additional output dataset collection whose elements are copies of the directories of files (these elements can be viewed with visualization tools). 190 * **Output primary and intermediate alignments** - selecting 'Yes' will produce a dataset collection of primary and intermediate alignments, the elements of which can be viewed with viaula tools, in addition to the final trimmed and/or filtered alignments dataset collection.
171 191
172 .. _trimAl: http://trimal.cgenomics.org 192 .. _trimAl: http://trimal.cgenomics.org
173 193
174 </help> 194 </help>
175 <citations> 195 <citations>