Mercurial > repos > abims-sbr > cds_search
comparison CDS_search.xml @ 0:eb95bf7f90ae draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:26:37 -0500 |
parents | |
children | c79bdda8abfb |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:eb95bf7f90ae |
---|---|
1 <tool name="CDS_search" id="cds_search" version="2.1.2"> | |
2 | |
3 <description> | |
4 ORF and CDS search | |
5 </description> | |
6 | |
7 <macros> | |
8 <import>macros.xml</import> | |
9 </macros> | |
10 | |
11 <requirements> | |
12 <expand macro="python_required" /> | |
13 </requirements> | |
14 | |
15 <command><![CDATA[ | |
16 #for $input in $inputs | |
17 ln -s '$input' '$input.element_identifier'; | |
18 echo '$input.element_identifier' >> list_files; | |
19 #end for | |
20 | |
21 ln -s $__tool_directory__/scripts/dico.py . && | |
22 | |
23 python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py | |
24 $__tool_directory__/scripts/code_universel_modified.txt | |
25 $length.min_length_seq | |
26 $nb_species_keep | |
27 list_files | |
28 > '$log' && | |
29 | |
30 python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py | |
31 $nb_species_keep | |
32 $methionine | |
33 $length.min_length_seq | |
34 $length.min_length_subseq | |
35 >> '$log' && | |
36 | |
37 python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py | |
38 $nb_species_keep | |
39 $length.min_length_nuc | |
40 >> '$log'; | |
41 ]]></command> | |
42 | |
43 <inputs> | |
44 <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" help="Only a fasta file with nucleic align sequences" /> | |
45 <!-- <param name="code_file" type="data" format="txt" label="Choose your file containing the universal code (codons and their amino acids)" /> --> | |
46 | |
47 <param name="nb_species_keep" type="integer" value="10" min="2" label="Minimal number of species in each locus" help="If you want to remove all the indels the maximum number of species is required" /> | |
48 | |
49 <param name="methionine" type="boolean" checked="true" truevalue="oui" falsevalue="non" label="Do you want to consider the Methionine in the search of CDS? " /> | |
50 | |
51 <section name="length" title="Do you want to choose the minimum length of the CDS?"> | |
52 <param name="min_length_seq" type="integer" value="50" min="0" label="Minimal length of the CDS, in proteic" help="By default it's 50" /> | |
53 <param name="min_length_subseq" type="integer" value="15" min="0" label="Minimal length of the subsequence, in proteic between two series of indels" help="By default it's 15" /> | |
54 <param name="min_length_nuc" type="integer" value="50" min="0" label="Minimal length of the CDS, in nucleic without the indel" help="By default it's 50" /> | |
55 </section> | |
56 | |
57 <param name="out_BESTORF" type="select" label="Do you want the outputs (dataset collection list) containing files with the BEST ORF? "> | |
58 <option value="no">No</option> | |
59 <option value="aa">Yes, with the proteic format</option> | |
60 <option value="nuc">Yes, with the nucleic format</option> | |
61 <option value="both">Yes, with the proteic and nucleic format</option> | |
62 </param> | |
63 | |
64 <param name="out_CDS" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS? "> | |
65 <option value="no">No</option> | |
66 <option value="aa">Yes, with the proteic format</option> | |
67 <option value="nuc">Yes, with the nucleic format</option> | |
68 <option value="both">Yes, with the proteic and nucleic format</option> | |
69 </param> | |
70 | |
71 <param name="out_CDS_filter" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS without indel? "> | |
72 <option value="no">No</option> | |
73 <option value="aa">Yes, with the proteic format</option> | |
74 <option value="nuc">Yes, with the nucleic format</option> | |
75 <option value="both">Yes, with the proteic and nucleic format</option> | |
76 </param> | |
77 </inputs> | |
78 | |
79 <outputs> | |
80 <data format="txt" name="log" label="ORF_Search" /> | |
81 <collection name="output_BESTORF_aa" type="list" label="ORF_Search_Best_ORF_aa"> | |
82 <filter>out_BESTORF in ["aa","both"]</filter> | |
83 <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_aa" /> | |
84 </collection> | |
85 | |
86 <collection name="output_BESTORF_nuc" type="list" label="ORF_Search_Best_ORF_nuc"> | |
87 <filter>out_BESTORF in ["nuc","both"]</filter> | |
88 <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_nuc" /> | |
89 </collection> | |
90 | |
91 <collection name="output_CDS_aa" type="list" label="ORF_Search_CDS_aa"> | |
92 <filter>out_CDS in ["aa","both"] and not methionine</filter> | |
93 <discover_datasets pattern="__name_and_ext__" directory="05_CDS_aa" /> | |
94 </collection> | |
95 | |
96 <collection name="output_CDS_nuc" type="list" label="ORF_Search_CDS_nuc"> | |
97 <filter>out_CDS in ["nuc","both"] and not methionine</filter> | |
98 <discover_datasets pattern="__name_and_ext__" directory="05_CDS_nuc" /> | |
99 </collection> | |
100 | |
101 <collection name="output_CDS_M_aa" type="list" label="ORF_Search_CDS_with_M_aa"> | |
102 <filter>(out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine)</filter> | |
103 <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_aa" /> | |
104 </collection> | |
105 | |
106 <collection name="output_CDS_M_nuc" type="list" label="ORF_Search_CDS_with_M_nuc"> | |
107 <filter>(out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine)</filter> | |
108 <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_nuc" /> | |
109 </collection> | |
110 | |
111 <collection name="output_filter_aa" type="list" label="ORF_Search_CDS_without_indel_aa"> | |
112 <filter>out_CDS_filter in ["aa","both"]</filter> | |
113 <discover_datasets pattern="__name_and_ext__" directory="08_CDS_aa_MINIMUM_MISSING_SEQUENCES" /> | |
114 </collection> | |
115 | |
116 <collection name="output_filter_nuc" type="list" label="ORF_Search_CDS_without_indel_nuc"> | |
117 <filter>out_CDS_filter in ["nuc","both"]</filter> | |
118 <discover_datasets pattern="__name_and_ext__" directory="08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" /> | |
119 </collection> | |
120 </outputs> | |
121 | |
122 <tests> | |
123 | |
124 <test> | |
125 <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" /> | |
126 <param name="nb_species_keep" value="3" /> | |
127 <param name="methionine" value="non" /> | |
128 <section name="length"> | |
129 <param name="min_length_seq" value="50" /> | |
130 <param name="min_length_subseq" value="15" /> | |
131 <param name="min_length_nuc" value="50" /> | |
132 </section> | |
133 <param name="out_BESTORF" value="both" /> | |
134 <param name="out_CDS" value="both" /> | |
135 <param name="out_CDS_filter" value="both" /> | |
136 <output_collection name="output_BESTORF_aa" type="list" count="2"> | |
137 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta" /> | |
138 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta" /> | |
139 </output_collection> | |
140 <output_collection name="output_BESTORF_nuc" type="list" count="2"> | |
141 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta" /> | |
142 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta" /> | |
143 </output_collection> | |
144 <output_collection name="output_CDS_aa" type="list" count="2"> | |
145 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta" /> | |
146 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta" /> | |
147 </output_collection> | |
148 <output_collection name="output_CDS_nuc" type="list" count="2"> | |
149 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta" /> | |
150 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta" /> | |
151 </output_collection> | |
152 <output_collection name="output_filter_aa" type="list" count="1"> | |
153 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta" /> | |
154 </output_collection> | |
155 <output_collection name="output_filter_nuc" type="list" count="1"> | |
156 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta" /> | |
157 </output_collection> | |
158 </test> | |
159 | |
160 <test> | |
161 <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" /> | |
162 <param name="nb_species_keep" value="2" /> | |
163 <param name="methionine" value="oui" /> | |
164 <section name="length"> | |
165 <param name="min_length_seq" value="50" /> | |
166 <param name="min_length_subseq" value="15" /> | |
167 <param name="min_length_nuc" value="50" /> | |
168 </section> | |
169 <param name="out_BESTORF" value="both" /> | |
170 <param name="out_CDS" value="both" /> | |
171 <param name="out_CDS_filter" value="both" /> | |
172 <output_collection name="output_BESTORF_aa" type="list" count="4"> | |
173 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta" /> | |
174 <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta" /> | |
175 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta" /> | |
176 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta" /> | |
177 </output_collection> | |
178 <output_collection name="output_BESTORF_nuc" type="list" count="4"> | |
179 <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta" /> | |
180 <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta" /> | |
181 <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta" /> | |
182 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta" /> | |
183 </output_collection> | |
184 <output_collection name="output_filter_aa" type="list" count="1"> | |
185 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta" /> | |
186 </output_collection> | |
187 <output_collection name="output_filter_nuc" type="list" count="1"> | |
188 <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta" /> | |
189 </output_collection> | |
190 </test> | |
191 | |
192 </tests> | |
193 <help> | |
194 | |
195 @HELP_AUTHORS@ | |
196 | |
197 <![CDATA[ | |
198 | |
199 **Description** | |
200 | |
201 This tool takes files containing nucleic aligned sequences and search the ORF and the CDS. | |
202 | |
203 -------- | |
204 | |
205 **Inputs** | |
206 | |
207 Input files : (multiple) fasta files with nucleic aligned sequences. | |
208 | |
209 -------- | |
210 | |
211 **Parameters** | |
212 | |
213 - methionine : choose to consider the methionine in the search of CDS. | |
214 yes/no. | |
215 | |
216 - 'Minimal number of species in each locus' | |
217 Default : 10 (integer). | |
218 | |
219 - 'min_length_seq' : | |
220 minimal length of the sequence (in amino acids). | |
221 when the removal of the indel is done, the minimal length equals : previous length - 20. | |
222 for example if you choose 50 for the minimal length, the actual length equals 30. | |
223 Default : 50 (integer). | |
224 | |
225 - 'min_length_subseq' : | |
226 minimal length of the subsequence (in amino acids). | |
227 subsequence means the part of the original sequence between 2 sets of indels. | |
228 an indel set is composed by more than 2 indels, if not the set is considered as unknown amino acid. | |
229 Default : 15 (integer). | |
230 | |
231 - 'min_length_nuc' : | |
232 Minimal length of the sequence in the nucleic format, without indels. | |
233 Default : 50 (integer). | |
234 | |
235 - others parameters allowing to choose which outputs you desire : | |
236 - outputs with best ORFs. | |
237 - outputs with CDS, with or without indels. | |
238 - in proteic or nucleic format. | |
239 | |
240 -------- | |
241 | |
242 **Outputs** | |
243 | |
244 - ORF_Search | |
245 the log file (mainly statistics about the tool). | |
246 | |
247 - ORF_Search_Best_ORF_aa | |
248 the output with the best ORF in the proteic format. | |
249 | |
250 - ORF_Search_Best_ORF_nuc | |
251 the output with the best ORF in the nucleic format. | |
252 | |
253 - ORF_Search_CDS_aa | |
254 the output with the CDS (regardless the Methionine) in the proteic format. | |
255 | |
256 - ORF_Search_CDS_nuc | |
257 the output with the CDS (regardless the Methionine) in the nucleic format. | |
258 | |
259 - ORF_Search_CDS_with_M_aa | |
260 the output with the CDS (considering the Methionine) in proteic format. | |
261 the rule : they must have a methionine before the minimal length of the sequence. | |
262 for example before the 30 last amino acid. | |
263 | |
264 - ORF_Search_CDS_with_M_nuc | |
265 the output with the CDS (considering the Methionine) in nucleic format. | |
266 the rule : they must have a methionine before the minimale length of the sequence. | |
267 for example before the 30 last amino acid. | |
268 | |
269 - ORF_Search_CDS_without_indel_aa | |
270 is the output with the CDS without indel in proteic format. | |
271 considering the Methionine or not : according to the option chosen. | |
272 | |
273 - ORF_Search_CDS_without_indel_nuc | |
274 is the output with the CDS without indel in proteic format. | |
275 considering the Methionine or not : according to the option chosen. | |
276 | |
277 --------- | |
278 | |
279 **The AdaptSearch Pipeline** | |
280 | |
281 .. image:: adaptsearch_picture_helps.png | |
282 | |
283 --------- | |
284 | |
285 Changelog | |
286 --------- | |
287 | |
288 **Version 2.0 - 05/07/2017** | |
289 | |
290 - NEW: Replace the zip between tools by Dataset Collection | |
291 | |
292 **Version 1.0 - 13/04/2017** | |
293 | |
294 - Added functional test with planemo | |
295 - planemo test with conda dependency for python | |
296 - Scripts renamed + symlinks to the directory 'scripts' | |
297 | |
298 ]]> | |
299 | |
300 </help> | |
301 | |
302 <citations> | |
303 | |
304 </citations> | |
305 | |
306 </tool> |