comparison b2btools_single_sequence.xml @ 0:b694a77ca1e8 draft default tip

planemo upload commit 599e1135baba020195b3f7576449d595bca9af75
author iuc
date Tue, 09 Aug 2022 12:30:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b694a77ca1e8
1 <tool
2 id="b2btools_single_sequence"
3 name="b2bTools: Biophysical predictors for single sequences"
4 version="3.0.5+galaxy0"
5 license="GPL-3.0"
6 python_template_version="3.5"
7 profile="21.05">
8 <description>from their amino-acid sequences</description>
9 <xrefs>
10 <xref type="bio.tools">b2btools</xref>
11 </xrefs>
12 <requirements>
13 <requirement type="package" version="3.0.5">b2btools</requirement>
14 </requirements>
15 <command detect_errors="exit_code"><![CDATA[
16 mkdir -p ./tabular ./plots &&
17 python '$__tool_directory__/script.py' --file '$input' --output ./tabular --json '$predictions_output'
18 $section_predictors.dynamine
19 $section_predictors.disomine
20 $section_predictors.efoldmine
21 $section_predictors.agmata
22 #if $section_plot.plot == '--plot' or $section_plot.plot_all == '--plot_all':
23 --plot-output ./plots
24 #end if
25 $section_plot.plot
26 $section_plot.plot_all
27 $section_plot.highlight
28 ]]></command>
29 <inputs>
30 <param type="data" name="input" format="fasta" label="Protein sequences in FASTA format" help="FASTA file of protein sequences"/>
31 <section name="section_predictors" title="Biophyisical predictors" help="Configure this section to select the predictions to be executed">
32 <param
33 argument="--dynamine"
34 type="boolean"
35 checked="true"
36 truevalue="--dynamine"
37 falsevalue=""
38 label="DynaMine: Prediction of protein backbone dynamics from sequence only"
39 help="Fast predictor of protein backbone dynamics using only sequence information as input. The version here also predicts side-chain dynamics and secondary structure predictors using the same principle." />
40 <param
41 argument="--disomine"
42 type="boolean"
43 checked="true"
44 truevalue="--disomine"
45 falsevalue=""
46 label="DisoMine: Prediction of protein disorder from sequence only"
47 help="Predicts protein disorder with recurrent neural networks not directly from the amino acid sequence, but instead from more generic predictions of key biophysical properties, here protein dynamics, secondary structure and early folding."/>
48 <param
49 argument="--efoldmine"
50 type="boolean"
51 checked="true"
52 truevalue="--efoldmine"
53 falsevalue=""
54 label="EFoldMine: Prediction of protein early folding regions from sequence only"
55 help="Predicts from the primary amino acid sequence of a protein, which amino acids are likely involved in early folding events."/>
56 <param
57 argument="--agmata"
58 type="boolean"
59 checked="true"
60 truevalue="--agmata"
61 falsevalue=""
62 label="Agmata: Prediction of protein regions that are likely to cause beta-aggregation"
63 help="Agmata is a single-sequence based predictor of protein regions that are likely to cause beta-aggregation. It is based on a model that uses the biophysical predictions of protein behaviour, not on amino acid codes directly."/>
64 </section>
65 <section name="section_plot" title="Plot options" help="Configure plot output">
66 <param
67 name="plot"
68 argument="--plot"
69 type="boolean"
70 label="Plot predicted values by sequence"
71 truevalue="--plot"
72 falsevalue=""
73 help="This option plots predicted values in different files"/>
74 <param
75 name="plot_all"
76 argument="--plot_all"
77 type="boolean"
78 label="Plot all sequences together"
79 truevalue="--plot_all"
80 falsevalue=""
81 help="This option plots all sequences together in order to compare predicted values of different sequences"/>
82 <param
83 name="highlight"
84 argument="--highlight"
85 type="boolean"
86 label="Highlight regions of interest"
87 truevalue="--highlight"
88 falsevalue=""
89 help="Highlight biophysical regions on the background of the plots"/>
90 </section>
91 </inputs>
92 <outputs>
93 <data name="predictions_output" label="Predictions in JSON format" format="json" />
94 <collection name="split_output" type="list" label="Tabular predictions by sequence">
95 <discover_datasets pattern="__name_and_ext__" format="tabular" directory="tabular" visible="true" />
96 </collection>
97 <collection name="split_output_plots" type="list" label="Plots">
98 <discover_datasets pattern="__name_and_ext__" format="png" directory="plots" visible="true" />
99 </collection>
100 </outputs>
101 <tests>
102 <!-- Test 1: All the predictors were selected, plotting both individual and aggregated charts -->
103 <test expect_exit_code="0" expect_num_outputs="3">
104 <param name="input" value="input.fasta" ftype="fasta"/>
105 <section name="section_predictors">
106 <param name="dynamine" value="true"/>
107 <param name="disomine" value="true"/>
108 <param name="efoldmine" value="true"/>
109 <param name="agmata" value="true"/>
110 </section>
111 <section name="section_plot">
112 <param name="plot" value="true"/>
113 <param name="plot_all" value="true"/>
114 <param name="highlight" value="true"/>
115 </section>
116 <assert_command>
117 <has_text text="--dynamine" />
118 <has_text text="--disomine" />
119 <has_text text="--agmata" />
120 <has_text text="--efoldmine" />
121 <has_text text="--json" />
122 <has_text text="--plot_all" />
123 <has_text text="--highlight" />
124 <has_text text="--highlight" />
125 <has_text text="--output" />
126 <has_text text="--plot-output" />
127 </assert_command>
128 <output name="predictions_output" value="test_output.json" ftype="json"/>
129 <!-- 11 sequences = 11 TSV files -->
130 <output_collection name="split_output" type="list" count="11">
131 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/>
132 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/>
133 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/>
134 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/>
135 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/>
136 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues.tsv" ftype="tsv" compare="diff"/>
137 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/>
138 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/>
139 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/>
140 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues.tsv" ftype="tsv" compare="diff"/>
141 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/>
142 </output_collection>
143 <!-- 9 predicted values x 11 sequences + 9 predicted values aggregated = (99 + 9) charts = 108 charts -->
144 <output_collection name="split_output_plots" type="list" count="108" />
145 </test>
146 <!-- Test 2: Only one predictor was selected, plotting both individual and aggregated charts -->
147 <test expect_exit_code="0" expect_num_outputs="3">
148 <param name="input" value="input.fasta" ftype="fasta"/>
149 <section name="section_predictors">
150 <param name="dynamine" value="true"/>
151 <param name="disomine" value="false"/>
152 <param name="efoldmine" value="false"/>
153 <param name="agmata" value="false"/>
154 </section>
155 <section name="section_plot">
156 <param name="plot" value="true"/>
157 <param name="plot_all" value="true"/>
158 <param name="highlight" value="true"/>
159 </section>
160 <assert_command>
161 <has_text text="--dynamine" />
162 <not_has_text text="--disomine" />
163 <not_has_text text="--agmata" />
164 <not_has_text text="--efoldmine" />
165 <has_text text="--json" />
166 <has_text text="--plot " />
167 <has_text text="--plot_all" />
168 <has_text text="--highlight" />
169 <has_text text="--output" />
170 <has_text text="--plot-output" />
171 </assert_command>
172 <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/>
173 <!-- 11 sequences = 11 TSV files -->
174 <output_collection name="split_output" type="list" count="11">
175 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
176 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
177 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
178 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
179 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
180 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
181 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
182 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
183 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
184 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
185 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
186 </output_collection>
187 <!-- 6 predicted values x 11 sequences + 6 predicted values aggregated = (66 + 6) charts = 72 charts -->
188 <output_collection name="split_output_plots" type="list" count="72" />
189 </test>
190 <!-- Test 3: Only one predictor was selected, plotting individual charts -->
191 <test expect_exit_code="0" expect_num_outputs="3">
192 <param name="input" value="input.fasta" ftype="fasta"/>
193 <section name="section_predictors">
194 <param name="dynamine" value="true"/>
195 <param name="disomine" value="false"/>
196 <param name="efoldmine" value="false"/>
197 <param name="agmata" value="false"/>
198 </section>
199 <section name="section_plot">
200 <param name="plot" value="true"/>
201 <param name="plot_all" value="false"/>
202 <param name="highlight" value="true"/>
203 </section>
204 <assert_command>
205 <has_text text="--dynamine" />
206 <not_has_text text="--disomine" />
207 <not_has_text text="--agmata" />
208 <not_has_text text="--efoldmine" />
209 <has_text text="--json" />
210 <has_text text="--plot " />
211 <not_has_text text="--plot_all" />
212 <has_text text="--highlight" />
213 <has_text text="--output" />
214 <has_text text="--plot-output" />
215 </assert_command>
216 <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/>
217 <!-- 11 sequences = 11 TSV files -->
218 <output_collection name="split_output" type="list" count="11">
219 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
220 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
221 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
222 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
223 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
224 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
225 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
226 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
227 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
228 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
229 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
230 </output_collection>
231 <!-- 6 predicted values x 11 sequences = 66 charts -->
232 <output_collection name="split_output_plots" type="list" count="66" />
233 </test>
234 <!-- Test 4: Only one predictor was selected, plotting aggregated charts -->
235 <test expect_exit_code="0" expect_num_outputs="3">
236 <param name="input" value="input.fasta" ftype="fasta"/>
237 <section name="section_predictors">
238 <param name="dynamine" value="true"/>
239 <param name="disomine" value="false"/>
240 <param name="efoldmine" value="false"/>
241 <param name="agmata" value="false"/>
242 </section>
243 <section name="section_plot">
244 <param name="plot" value="false"/>
245 <param name="plot_all" value="true"/>
246 <param name="highlight" value="true"/>
247 </section>
248 <assert_command>
249 <has_text text="--dynamine" />
250 <not_has_text text="--disomine" />
251 <not_has_text text="--agmata" />
252 <not_has_text text="--efoldmine" />
253 <has_text text="--json" />
254 <has_text text="--plot_all" />
255 <has_text text="--highlight" />
256 <not_has_text text="--plot " />
257 <has_text text="--output" />
258 <has_text text="--plot-output" />
259 </assert_command>
260 <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/>
261 <!-- 11 sequences = 11 TSV files -->
262 <output_collection name="split_output" type="list" count="11">
263 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
264 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
265 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
266 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
267 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
268 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
269 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
270 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
271 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
272 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
273 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
274 </output_collection>
275 <!-- 6 predicted values aggregated = 6 charts -->
276 <output_collection name="split_output_plots" type="list" count="6" />
277 </test>
278 <!-- Test 5: Only one predictor was selected, plotting aggregated charts with no highlighting -->
279 <test expect_exit_code="0" expect_num_outputs="3">
280 <param name="input" value="input.fasta" ftype="fasta"/>
281 <section name="section_predictors">
282 <param name="dynamine" value="true"/>
283 <param name="disomine" value="false"/>
284 <param name="efoldmine" value="false"/>
285 <param name="agmata" value="false"/>
286 </section>
287 <section name="section_plot">
288 <param name="plot" value="false"/>
289 <param name="plot_all" value="true"/>
290 </section>
291 <assert_command>
292 <has_text text="--dynamine" />
293 <has_text text="--json" />
294 <has_text text="--plot_all" />
295 <has_text text="--output" />
296 <has_text text="--plot-output" />
297 <not_has_text text="--disomine" />
298 <not_has_text text="--agmata" />
299 <not_has_text text="--efoldmine" />
300 <not_has_text text="--highlight" />
301 <not_has_text text="--plot " />
302 </assert_command>
303 <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/>
304 <!-- 11 sequences = 11 TSV files -->
305 <output_collection name="split_output" type="list" count="11">
306 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
307 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
308 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
309 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
310 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
311 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
312 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
313 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
314 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
315 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
316 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
317 </output_collection>
318 <!-- 6 predicted values aggregated = 6 charts -->
319 <output_collection name="split_output_plots" type="list" count="6" />
320 </test>
321 <!-- Test 6: Only one predictor was selected, plotting was disabled -->
322 <test expect_exit_code="0" expect_num_outputs="3">
323 <param name="input" value="input.fasta" ftype="fasta"/>
324 <section name="section_predictors">
325 <param name="dynamine" value="true"/>
326 <param name="disomine" value="false"/>
327 <param name="efoldmine" value="false"/>
328 <param name="agmata" value="false"/>
329 </section>
330 <section name="section_plot">
331 <param name="plot" value="false"/>
332 <param name="plot_all" value="false"/>
333 <param name="highlight" value="false"/>
334 </section>
335 <assert_command>
336 <has_text text="--dynamine" />
337 <has_text text="--json" />
338 <has_text text="--output" />
339 <not_has_text text="--disomine" />
340 <not_has_text text="--agmata" />
341 <not_has_text text="--efoldmine" />
342 <not_has_text text="--plot " />
343 <not_has_text text="--plot_all" />
344 <not_has_text text="--highlight" />
345 <not_has_text text="--plot-output" />
346 </assert_command>
347 <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/>
348 <!-- 11 sequences = 11 TSV files -->
349 <output_collection name="split_output" type="list" count="11">
350 <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
351 <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
352 <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
353 <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
354 <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
355 <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
356 <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
357 <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
358 <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
359 <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
360 <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/>
361 </output_collection>
362 <output_collection name="split_output_plots" type="list" count="0" />
363 </test>
364 <!-- Test 7: No predictor selected, it must fail -->
365 <test expect_failure="true" expect_exit_code="2">
366 <param name="input" value="input.fasta" ftype="fasta"/>
367 <section name="section_predictors">
368 <param name="dynamine" value="false"/>
369 <param name="disomine" value="false"/>
370 <param name="efoldmine" value="false"/>
371 <param name="agmata" value="false"/>
372 </section>
373 <section name="section_plot">
374 <param name="plot" value="false"/>
375 <param name="plot_all" value="false"/>
376 <param name="highlight" value="false"/>
377 </section>
378 <assert_command>
379 <not_has_text text="--dynamine" />
380 <not_has_text text="--disomine" />
381 <not_has_text text="--agmata" />
382 <not_has_text text="--efoldmine" />
383 <not_has_text text="--plot-output" />
384 <not_has_text text="--plot " />
385 <not_has_text text="--plot_all" />
386 <not_has_text text="--highlight" />
387 </assert_command>
388 </test>
389 <!-- Test 8: No input file, it must fail -->
390 <test expect_failure="true" expect_exit_code="1">
391 <section name="section_predictors">
392 <param name="dynamine" value="true"/>
393 <param name="disomine" value="false"/>
394 <param name="efoldmine" value="false"/>
395 <param name="agmata" value="false"/>
396 </section>
397 <section name="section_plot">
398 <param name="plot" value="false"/>
399 <param name="plot_all" value="false"/>
400 <param name="highlight" value="false"/>
401 </section>
402 <assert_command>
403 <has_text text="--dynamine" />
404 <not_has_text text="--disomine" />
405 <not_has_text text="--agmata" />
406 <not_has_text text="--efoldmine" />
407 <not_has_text text="--plot-output" />
408 <not_has_text text="--plot " />
409 <not_has_text text="--plot_all" />
410 <not_has_text text="--highlight" />
411 </assert_command>
412 </test>
413 <!-- Test 9: No valid input file format, it must fail -->
414 <test expect_failure="true" expect_exit_code="1">
415 <param name="input" value="wrong.fasta" ftype="fasta"/>
416 <section name="section_predictors">
417 <param name="dynamine" value="true"/>
418 <param name="disomine" value="false"/>
419 <param name="efoldmine" value="false"/>
420 <param name="agmata" value="false"/>
421 </section>
422 <section name="section_plot">
423 <param name="plot" value="false"/>
424 <param name="plot_all" value="false"/>
425 <param name="highlight" value="false"/>
426 </section>
427 <assert_command>
428 <has_text text="--dynamine" />
429 <not_has_text text="--disomine" />
430 <not_has_text text="--agmata" />
431 <not_has_text text="--efoldmine" />
432 <not_has_text text="--plot-output" />
433 <not_has_text text="--plot " />
434 <not_has_text text="--plot_all" />
435 <not_has_text text="--highlight" />
436 </assert_command>
437 </test>
438 <!-- Test 10: Wrong input file type, it must fail -->
439 <test expect_failure="true" expect_exit_code="1">
440 <param name="input" value="wrong.tsv" ftype="tabular"/>
441 <section name="section_predictors">
442 <param name="dynamine" value="true"/>
443 <param name="disomine" value="false"/>
444 <param name="efoldmine" value="false"/>
445 <param name="agmata" value="false"/>
446 </section>
447 <section name="section_plot">
448 <param name="plot" value="false"/>
449 <param name="plot_all" value="false"/>
450 <param name="highlight" value="false"/>
451 </section>
452 <assert_command>
453 <has_text text="--dynamine" />
454 <not_has_text text="--disomine" />
455 <not_has_text text="--agmata" />
456 <not_has_text text="--efoldmine" />
457 <not_has_text text="--plot-output" />
458 <not_has_text text="--plot " />
459 <not_has_text text="--plot_all" />
460 <not_has_text text="--highlight" />
461 </assert_command>
462 </test>
463 </tests>
464 <help><![CDATA[
465 Single protein sequence analysis
466 --------------------------------
467 The **current Galaxy Tool** is the implementation of the Bio2Byte tools for **single protein sequence analysis**.
468 It means that this tool receives amino-acid sequences in FASTA format and
469 will return the biophysical predictions depending on the predictors selected.
470
471 Input sequences
472 '''''''''''''''
473 The tool works with either single or multiple input files, just keeping in mind that
474 the file must be a valid FASTA containing at least one sequence of amino acids.
475
476 There is no limitation on the number of sequences per file, however, all
477 the sequences must have at least 5 residues.
478
479 Available biophysical predictors
480 ''''''''''''''''''''''''''''''''
481 This is the list of the available Bio2Byte tools. As you can see, each prediction tool contains a subset of
482 predicted features, for instance, DynaMine produces six features which will appear as column in the tabular output and as keys in the JSON output:
483
484 - **DynaMine**: Fast predictor of protein backbone dynamics using only sequence information as input. The version here also predicts side-chain dynamics and secondary structure predictors using the same principle.
485 - *Backbone dynamics*: found as ``backbone`` inside either the tabular result, the JSON output file, or the plots in PNG format.
486 - *Side chain dynamics*: found as ``sidechain`` inside either the tabular result, the JSON output file, or the plots in PNG format.
487 - *Helix propensity*: found as ``helix`` inside either the tabular result, the JSON output file, or the plots in PNG format.
488 - *Sheet propensity*: found as ``sheet`` inside either the tabular result, the JSON output file, or the plots in PNG format.
489 - *Coil propensity*: found as ``coil`` inside either the tabular result, the JSON output file, or the plots in PNG format.
490 - *Polyproline-II propensity*: found as ``ppII`` inside either the tabular result, the JSON output file, or the plots in PNG format.
491 - **DisoMine**: Predicts protein disorder with recurrent neural networks not directly from the amino acid sequence, but instead from more generic predictions of key biophysical properties, here protein dynamics, secondary structure, and early folding.
492 - *Disorder*: found as ``disomine`` inside either the tabular result, the JSON output file, or the plots in PNG format.
493 - **EFoldMine**: Predicts from the primary amino acid sequence of a protein, which amino acids are likely involved in early folding events.
494 - *Early folding*: found as ``efoldmine`` inside either the tabular result, the JSON output file, or the plots in PNG format.
495 - **Agmata**: Beta-sheet aggregation with the following predicted features:
496 - *Beta-sheet aggregation*: found as ``agmata`` inside either the tabular result, the JSON output file, or the plots in PNG format.
497
498 Expected results
499 ''''''''''''''''
500 From a single FASTA file with N sequences, the process will produce:
501
502 - **single JSON output**: it will contain a key per sequence defined in the input FASTA file. Each key contains the predicted features* for that sequence.
503 - **N tabular results**: Each tabular result is a table where the columns are the predicted features and the rows are each residue of the sequence
504 - **N predicted features plots** (in case the user checked the ``plot`` parameter): Each predicted feature is plotted for each sequence
505 - **predicted feature plots** (in case the user checked the ``plot_all`` parameter): For each predicted feature, there will be a plot containing a series per sequence, so all the sequences are plotted in one single png file for each predicted feature
506
507 Example
508 ~~~~~~~
509
510 Given this input in FASTA format with 11 sequences:
511 ::
512
513 >random_sequence_01_consisting_of_40_residues
514 MDRHDPVQKSMMMDRHDPVQKMDRHDPVQKSDRHDPVQKS
515
516 >random_sequence_02_consisting_of_40_residues
517 MWSMWRAMWSSQRAMWSMWRAMWSMSQRAMWSMWRAMWSM
518
519 >random_sequence_03_consisting_of_30_residues
520 YSWTHYELKAVWCELTYWRSWTHYELKAVV
521
522 >random_sequence_04_consisting_of_40_residues
523 SWTHYEYSWTHYELKAVWCELTYWRSWTHYELKAVVLKAV
524
525 >random_sequence_05_consisting_of_30_residues
526 NCPIEHHLCANKMDLHHHHLCAHHLPEDQY
527
528 >random_sequence_06_consisting_of_45_residues
529 YACLFQKPYIHHLCANKMDLHHNKMDLHHHHLCAHHLHHLCAHHL
530
531 >random_sequence_07_consisting_of_30_residues
532 FHHLCANKMDLHHHHLCAHHLVPGKQEPDS
533
534 >random_sequence_08_consisting_of_40_residues
535 HHLCANKMDLHHHHLCAHHLCANKMDLNKMDLLCANKMDL
536
537 >random_sequence_09_consisting_of_30_residues
538 GNKTPFMKMHGGNKTPFMKMHNKTPFMKMH
539
540 >random_sequence_10_consisting_of_65_residues
541 LDNSKMWQLDNPMPMSKMWQLDNSKMWQLDNLDNSKMWQLDNPMPMSKMWQLDNSKMWQLDNAAA
542
543 >random_sequence_11_consisting_of_30_residues
544 PMSKMWQLDNMSKMWQLDNPMSKMWQLDNA
545
546 There will be the next results if all the predictor tools (DynaMine, DisoMine, EFoldMine, and AgMata) are selected:
547
548 JSON result
549 ...........
550 Containing all the input sequences and their predicted features depending on the selected predictors.
551 ::
552
553 {
554 "random_sequence_03_consisting_of_30_residues": {
555 "agmata": [
556 0.012,
557 0.08,
558 0.119,
559 0.154,
560 0.2,
561 0.34,
562 1.023,
563 4.084,
564 12.328,
565 20.723,
566 25.866,
567 24.864,
568 17.509,
569 9.32,
570 3.591,
571 1.559,
572 0.64,
573 0.221,
574 0.084,
575 0.038,
576 0.031,
577 0.035,
578 0.052,
579 0.159,
580 1.115,
581 7.747,
582 8.329,
583 8.214,
584 7.259,
585 0.608
586 ],
587 "backbone": [
588 0.818,
589 0.833,
590 0.87,
591 ...
592 ],
593 ...
594 },
595 ...
596 }
597
598 Tabular results
599 ...............
600 There will be a tabular files (.TSV) for each sequence where each row is a residue of the sequence and the columns are the predicted features depending on the selected predictors.
601
602 ::
603
604 residue_index residue agmata backbone coil disoMine earlyFolding helix ppII sheet sidechain
605 0 Y 0.012 0.818 0.376 0.783 0.031 0.262 0.074 0.295 0.624
606 1 S 0.08 0.833 0.373 0.818 0.05 0.253 0.07 0.33 0.59
607 2 W 0.119 0.87 0.361 0.772 0.061 0.27 0.057 0.341 0.696
608 3 T 0.154 0.906 0.336 0.641 0.064 0.334 0.037 0.341 0.665
609 4 H 0.2 0.945 0.295 0.596 0.099 0.421 0.017 0.324 0.673
610 5 Y 0.34 0.969 0.261 0.583 0.15 0.47 0.015 0.339 0.644
611 6 E 1.023 0.967 0.251 0.534 0.187 0.494 0.024 0.354 0.438
612 7 L 4.084 0.959 0.247 0.441 0.291 0.481 0.025 0.382 0.677
613 8 K 12.328 0.972 0.237 0.355 0.294 0.464 0.024 0.429 0.449
614 9 A 20.723 0.974 0.239 0.273 0.274 0.442 0.026 0.436 0.685
615 10 V 25.866 0.984 0.233 0.231 0.336 0.415 0.028 0.45 0.684
616 11 W 24.864 1.016 0.222 0.193 0.408 0.412 0.023 0.471 0.722
617 12 C 17.51 1.014 0.229 0.174 0.426 0.396 0.016 0.479 0.747
618 13 E 9.32 0.997 0.241 0.155 0.261 0.434 0.016 0.456 0.403
619 14 L 3.591 0.99 0.255 0.142 0.259 0.443 0.012 0.422 0.704
620 15 T 1.559 0.977 0.277 0.138 0.278 0.45 0.019 0.379 0.72
621 16 Y 0.64 0.969 0.299 0.134 0.282 0.428 0.025 0.353 0.656
622 17 W 0.221 0.976 0.317 0.135 0.262 0.415 0.027 0.328 0.704
623 18 R 0.084 0.957 0.331 0.132 0.147 0.41 0.029 0.295 0.462
624 19 S 0.038 0.956 0.342 0.148 0.142 0.393 0.028 0.289 0.627
625 20 W 0.031 0.947 0.355 0.182 0.178 0.379 0.031 0.304 0.71
626 21 T 0.035 0.964 0.333 0.205 0.187 0.393 0.023 0.305 0.705
627 22 H 0.052 0.976 0.286 0.228 0.19 0.451 0.013 0.309 0.663
628 23 Y 0.159 0.967 0.268 0.229 0.185 0.503 0.016 0.296 0.635
629 24 E 1.115 0.949 0.26 0.226 0.187 0.528 0.022 0.319 0.385
630 25 L 7.747 0.929 0.243 0.215 0.261 0.519 0.023 0.358 0.649
631 26 K 8.329 0.924 0.225 0.253 0.247 0.487 0.023 0.405 0.399
632 27 A 8.214 0.906 0.22 0.313 0.25 0.452 0.031 0.451 0.606
633 28 V 7.259 0.89 0.218 0.405 0.076 0.443 0.042 0.455 0.607
634 29 V 0.608 0.871 0.242 0.605 0.029 0.418 0.051 0.426 0.596
635
636 Plots
637 .....
638
639 Depending on the options selected, there will be a plot (built using ``matplotlib``)
640 per prediction per sequence and/or a plot per prediction for all the sequences together.
641
642 - Checking **Plot** parameter implies that predicted features will be plotted in different files by input sequence.
643 - 11 input sequences x 9 predicted features = 99 plots in PNG format
644 - ``sequence_01_agmata`` plot
645 - ``sequence_01_backbone`` plot
646 - ``sequence_01_coil`` plot
647 - ``sequence_01_disoMine`` plot
648 - ``sequence_01_earlyFolding`` plot
649 - ``sequence_01_helix`` plot
650 - ``sequence_01_ppII`` plot
651 - ``sequence_01_sheet`` plot
652 - ``sequence_01_sidechain`` plot
653 - ...
654 - ``sequence_11_sidechain`` plot
655
656 - Checking **Plot all** parameter implies that all the input sequences will be plotted together in order to compare predicted features.
657 - 9 predicted features = 9 plots in PNG format
658 - **agmata plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
659 - **backbone plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
660 - **coil plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
661 - **disoMine plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
662 - **earlyFolding plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
663 - **helix plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
664 - **ppII plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
665 - **sheet plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
666 - **sidechain plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``)
667
668 About Bio2Byte
669 --------------
670
671 **We research the relation between protein sequence and biophysical behavior.**
672
673 Proteins are the molecular machines that make cells work.
674 They perform a wide variety of functions through interactions with each other and many additional molecules.
675 Traditionally, proteins are described in a single static state (a picture).
676 It is now increasingly recognized that many proteins can adopt multiple states and move between these conformational states dynamically (a movie).
677
678 We investigate how the dynamics, conformational states, and available experimental data of proteins relate to their amino acid sequence.
679 Underlying physical and chemical principles are computationally unraveled through data integration,
680 analysis and machine learning, so connecting them to biological events and improving our understanding of the way proteins work.
681
682 Visit our website for further information: https://bio2byte.be
683
684 About Bio2Byte tools
685 ''''''''''''''''''''
686 The software suite "Bio2byte tools", known as ``b2btools`` offers a set of biophysical predictions for both single protein sequences and MSA input files.
687
688 **Useful links:**
689
690 ``B2bTools`` is also available on the *Python Package Index* (``PyPI``) https://pypi.org/project/b2bTools/, as well as on *Bioconda* https://bioconda.github.io/recipes/b2btools/README.html.
691 We also provide a set of online examples in Jupyter Notebook format that are available to run on the Google Colab platform on https://github.com/Bio2Byte/public_notebooks.
692
693 ]]>
694 </help>
695 <creator>
696 <organization name="bio2Byte" url="https://bio2byte.be" email="Wim.Vranken@vub.be"/>
697 <organization name="Vrije Universiteit Brussel" url="https://vub.be" alternateName="VUB"/>
698 <person honorificPrefix="Prof." givenName="Wim" familyName="Vranken" email="Wim.Vranken@vub.be" identifier="http://orcid.org/0000-0001-7470-4324" />
699 <person givenName="Jose" familyName="Gavalda-Garcia" email="Jose.Gavalda.Garcia@vub.be" identifier="http://orcid.org/0000-0001-6431-3442" />
700 <person givenName="Adrian" familyName="Diaz" email="Adrian.Diaz@vub.be" identifier="http://orcid.org/0000-0003-0165-1318" />
701 </creator>
702 <citations>
703 <citation type="doi">10.1038/ncomms3741</citation>
704 <citation type="doi">10.1101&#47;2020.05.25.115253</citation>
705 <citation type="doi">10.1038&#47;s41598-017-08366-3</citation>
706 <citation type="doi">10.1093/bioinformatics/btz912</citation>
707 </citations>
708 </tool>