Mercurial > repos > iuc > b2btools_single_sequence
diff b2btools_single_sequence.xml @ 0:b694a77ca1e8 draft default tip
planemo upload commit 599e1135baba020195b3f7576449d595bca9af75
author | iuc |
---|---|
date | Tue, 09 Aug 2022 12:30:52 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/b2btools_single_sequence.xml Tue Aug 09 12:30:52 2022 +0000 @@ -0,0 +1,708 @@ +<tool + id="b2btools_single_sequence" + name="b2bTools: Biophysical predictors for single sequences" + version="3.0.5+galaxy0" + license="GPL-3.0" + python_template_version="3.5" + profile="21.05"> + <description>from their amino-acid sequences</description> + <xrefs> + <xref type="bio.tools">b2btools</xref> + </xrefs> + <requirements> + <requirement type="package" version="3.0.5">b2btools</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +mkdir -p ./tabular ./plots && +python '$__tool_directory__/script.py' --file '$input' --output ./tabular --json '$predictions_output' + $section_predictors.dynamine + $section_predictors.disomine + $section_predictors.efoldmine + $section_predictors.agmata +#if $section_plot.plot == '--plot' or $section_plot.plot_all == '--plot_all': + --plot-output ./plots +#end if + $section_plot.plot + $section_plot.plot_all + $section_plot.highlight + ]]></command> + <inputs> + <param type="data" name="input" format="fasta" label="Protein sequences in FASTA format" help="FASTA file of protein sequences"/> + <section name="section_predictors" title="Biophyisical predictors" help="Configure this section to select the predictions to be executed"> + <param + argument="--dynamine" + type="boolean" + checked="true" + truevalue="--dynamine" + falsevalue="" + label="DynaMine: Prediction of protein backbone dynamics from sequence only" + help="Fast predictor of protein backbone dynamics using only sequence information as input. The version here also predicts side-chain dynamics and secondary structure predictors using the same principle." /> + <param + argument="--disomine" + type="boolean" + checked="true" + truevalue="--disomine" + falsevalue="" + label="DisoMine: Prediction of protein disorder from sequence only" + help="Predicts protein disorder with recurrent neural networks not directly from the amino acid sequence, but instead from more generic predictions of key biophysical properties, here protein dynamics, secondary structure and early folding."/> + <param + argument="--efoldmine" + type="boolean" + checked="true" + truevalue="--efoldmine" + falsevalue="" + label="EFoldMine: Prediction of protein early folding regions from sequence only" + help="Predicts from the primary amino acid sequence of a protein, which amino acids are likely involved in early folding events."/> + <param + argument="--agmata" + type="boolean" + checked="true" + truevalue="--agmata" + falsevalue="" + label="Agmata: Prediction of protein regions that are likely to cause beta-aggregation" + help="Agmata is a single-sequence based predictor of protein regions that are likely to cause beta-aggregation. It is based on a model that uses the biophysical predictions of protein behaviour, not on amino acid codes directly."/> + </section> + <section name="section_plot" title="Plot options" help="Configure plot output"> + <param + name="plot" + argument="--plot" + type="boolean" + label="Plot predicted values by sequence" + truevalue="--plot" + falsevalue="" + help="This option plots predicted values in different files"/> + <param + name="plot_all" + argument="--plot_all" + type="boolean" + label="Plot all sequences together" + truevalue="--plot_all" + falsevalue="" + help="This option plots all sequences together in order to compare predicted values of different sequences"/> + <param + name="highlight" + argument="--highlight" + type="boolean" + label="Highlight regions of interest" + truevalue="--highlight" + falsevalue="" + help="Highlight biophysical regions on the background of the plots"/> + </section> + </inputs> + <outputs> + <data name="predictions_output" label="Predictions in JSON format" format="json" /> + <collection name="split_output" type="list" label="Tabular predictions by sequence"> + <discover_datasets pattern="__name_and_ext__" format="tabular" directory="tabular" visible="true" /> + </collection> + <collection name="split_output_plots" type="list" label="Plots"> + <discover_datasets pattern="__name_and_ext__" format="png" directory="plots" visible="true" /> + </collection> + </outputs> + <tests> + <!-- Test 1: All the predictors were selected, plotting both individual and aggregated charts --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="true"/> + <param name="efoldmine" value="true"/> + <param name="agmata" value="true"/> + </section> + <section name="section_plot"> + <param name="plot" value="true"/> + <param name="plot_all" value="true"/> + <param name="highlight" value="true"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <has_text text="--disomine" /> + <has_text text="--agmata" /> + <has_text text="--efoldmine" /> + <has_text text="--json" /> + <has_text text="--plot_all" /> + <has_text text="--highlight" /> + <has_text text="--highlight" /> + <has_text text="--output" /> + <has_text text="--plot-output" /> + </assert_command> + <output name="predictions_output" value="test_output.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <!-- 9 predicted values x 11 sequences + 9 predicted values aggregated = (99 + 9) charts = 108 charts --> + <output_collection name="split_output_plots" type="list" count="108" /> + </test> + <!-- Test 2: Only one predictor was selected, plotting both individual and aggregated charts --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="true"/> + <param name="plot_all" value="true"/> + <param name="highlight" value="true"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <has_text text="--json" /> + <has_text text="--plot " /> + <has_text text="--plot_all" /> + <has_text text="--highlight" /> + <has_text text="--output" /> + <has_text text="--plot-output" /> + </assert_command> + <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <!-- 6 predicted values x 11 sequences + 6 predicted values aggregated = (66 + 6) charts = 72 charts --> + <output_collection name="split_output_plots" type="list" count="72" /> + </test> + <!-- Test 3: Only one predictor was selected, plotting individual charts --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="true"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="true"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <has_text text="--json" /> + <has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <has_text text="--highlight" /> + <has_text text="--output" /> + <has_text text="--plot-output" /> + </assert_command> + <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <!-- 6 predicted values x 11 sequences = 66 charts --> + <output_collection name="split_output_plots" type="list" count="66" /> + </test> + <!-- Test 4: Only one predictor was selected, plotting aggregated charts --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="true"/> + <param name="highlight" value="true"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <has_text text="--json" /> + <has_text text="--plot_all" /> + <has_text text="--highlight" /> + <not_has_text text="--plot " /> + <has_text text="--output" /> + <has_text text="--plot-output" /> + </assert_command> + <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <!-- 6 predicted values aggregated = 6 charts --> + <output_collection name="split_output_plots" type="list" count="6" /> + </test> + <!-- Test 5: Only one predictor was selected, plotting aggregated charts with no highlighting --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="true"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <has_text text="--json" /> + <has_text text="--plot_all" /> + <has_text text="--output" /> + <has_text text="--plot-output" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--highlight" /> + <not_has_text text="--plot " /> + </assert_command> + <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <!-- 6 predicted values aggregated = 6 charts --> + <output_collection name="split_output_plots" type="list" count="6" /> + </test> + <!-- Test 6: Only one predictor was selected, plotting was disabled --> + <test expect_exit_code="0" expect_num_outputs="3"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="false"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <has_text text="--json" /> + <has_text text="--output" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <not_has_text text="--highlight" /> + <not_has_text text="--plot-output" /> + </assert_command> + <output name="predictions_output" value="test_output_dynamine.json" ftype="json"/> + <!-- 11 sequences = 11 TSV files --> + <output_collection name="split_output" type="list" count="11"> + <element name="random_sequence_01_consisting_of_40_residues" file="random_sequence_01_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_02_consisting_of_40_residues" file="random_sequence_02_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_03_consisting_of_30_residues" file="random_sequence_03_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_04_consisting_of_40_residues" file="random_sequence_04_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_05_consisting_of_30_residues" file="random_sequence_05_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_06_consisting_of_45_residues" file="random_sequence_06_consisting_of_45_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_07_consisting_of_30_residues" file="random_sequence_07_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_08_consisting_of_40_residues" file="random_sequence_08_consisting_of_40_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_09_consisting_of_30_residues" file="random_sequence_09_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_10_consisting_of_65_residues" file="random_sequence_10_consisting_of_65_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + <element name="random_sequence_11_consisting_of_30_residues" file="random_sequence_11_consisting_of_30_residues_dynamine.tsv" ftype="tsv" compare="diff"/> + </output_collection> + <output_collection name="split_output_plots" type="list" count="0" /> + </test> + <!-- Test 7: No predictor selected, it must fail --> + <test expect_failure="true" expect_exit_code="2"> + <param name="input" value="input.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="false"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="false"/> + </section> + <assert_command> + <not_has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--plot-output" /> + <not_has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <not_has_text text="--highlight" /> + </assert_command> + </test> + <!-- Test 8: No input file, it must fail --> + <test expect_failure="true" expect_exit_code="1"> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="false"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--plot-output" /> + <not_has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <not_has_text text="--highlight" /> + </assert_command> + </test> + <!-- Test 9: No valid input file format, it must fail --> + <test expect_failure="true" expect_exit_code="1"> + <param name="input" value="wrong.fasta" ftype="fasta"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="false"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--plot-output" /> + <not_has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <not_has_text text="--highlight" /> + </assert_command> + </test> + <!-- Test 10: Wrong input file type, it must fail --> + <test expect_failure="true" expect_exit_code="1"> + <param name="input" value="wrong.tsv" ftype="tabular"/> + <section name="section_predictors"> + <param name="dynamine" value="true"/> + <param name="disomine" value="false"/> + <param name="efoldmine" value="false"/> + <param name="agmata" value="false"/> + </section> + <section name="section_plot"> + <param name="plot" value="false"/> + <param name="plot_all" value="false"/> + <param name="highlight" value="false"/> + </section> + <assert_command> + <has_text text="--dynamine" /> + <not_has_text text="--disomine" /> + <not_has_text text="--agmata" /> + <not_has_text text="--efoldmine" /> + <not_has_text text="--plot-output" /> + <not_has_text text="--plot " /> + <not_has_text text="--plot_all" /> + <not_has_text text="--highlight" /> + </assert_command> + </test> + </tests> + <help><![CDATA[ +Single protein sequence analysis +-------------------------------- +The **current Galaxy Tool** is the implementation of the Bio2Byte tools for **single protein sequence analysis**. +It means that this tool receives amino-acid sequences in FASTA format and +will return the biophysical predictions depending on the predictors selected. + +Input sequences +''''''''''''''' +The tool works with either single or multiple input files, just keeping in mind that +the file must be a valid FASTA containing at least one sequence of amino acids. + +There is no limitation on the number of sequences per file, however, all +the sequences must have at least 5 residues. + +Available biophysical predictors +'''''''''''''''''''''''''''''''' +This is the list of the available Bio2Byte tools. As you can see, each prediction tool contains a subset of +predicted features, for instance, DynaMine produces six features which will appear as column in the tabular output and as keys in the JSON output: + +- **DynaMine**: Fast predictor of protein backbone dynamics using only sequence information as input. The version here also predicts side-chain dynamics and secondary structure predictors using the same principle. + - *Backbone dynamics*: found as ``backbone`` inside either the tabular result, the JSON output file, or the plots in PNG format. + - *Side chain dynamics*: found as ``sidechain`` inside either the tabular result, the JSON output file, or the plots in PNG format. + - *Helix propensity*: found as ``helix`` inside either the tabular result, the JSON output file, or the plots in PNG format. + - *Sheet propensity*: found as ``sheet`` inside either the tabular result, the JSON output file, or the plots in PNG format. + - *Coil propensity*: found as ``coil`` inside either the tabular result, the JSON output file, or the plots in PNG format. + - *Polyproline-II propensity*: found as ``ppII`` inside either the tabular result, the JSON output file, or the plots in PNG format. +- **DisoMine**: Predicts protein disorder with recurrent neural networks not directly from the amino acid sequence, but instead from more generic predictions of key biophysical properties, here protein dynamics, secondary structure, and early folding. + - *Disorder*: found as ``disomine`` inside either the tabular result, the JSON output file, or the plots in PNG format. +- **EFoldMine**: Predicts from the primary amino acid sequence of a protein, which amino acids are likely involved in early folding events. + - *Early folding*: found as ``efoldmine`` inside either the tabular result, the JSON output file, or the plots in PNG format. +- **Agmata**: Beta-sheet aggregation with the following predicted features: + - *Beta-sheet aggregation*: found as ``agmata`` inside either the tabular result, the JSON output file, or the plots in PNG format. + +Expected results +'''''''''''''''' +From a single FASTA file with N sequences, the process will produce: + +- **single JSON output**: it will contain a key per sequence defined in the input FASTA file. Each key contains the predicted features* for that sequence. +- **N tabular results**: Each tabular result is a table where the columns are the predicted features and the rows are each residue of the sequence +- **N predicted features plots** (in case the user checked the ``plot`` parameter): Each predicted feature is plotted for each sequence +- **predicted feature plots** (in case the user checked the ``plot_all`` parameter): For each predicted feature, there will be a plot containing a series per sequence, so all the sequences are plotted in one single png file for each predicted feature + +Example +~~~~~~~ + +Given this input in FASTA format with 11 sequences: +:: + + >random_sequence_01_consisting_of_40_residues + MDRHDPVQKSMMMDRHDPVQKMDRHDPVQKSDRHDPVQKS + + >random_sequence_02_consisting_of_40_residues + MWSMWRAMWSSQRAMWSMWRAMWSMSQRAMWSMWRAMWSM + + >random_sequence_03_consisting_of_30_residues + YSWTHYELKAVWCELTYWRSWTHYELKAVV + + >random_sequence_04_consisting_of_40_residues + SWTHYEYSWTHYELKAVWCELTYWRSWTHYELKAVVLKAV + + >random_sequence_05_consisting_of_30_residues + NCPIEHHLCANKMDLHHHHLCAHHLPEDQY + + >random_sequence_06_consisting_of_45_residues + YACLFQKPYIHHLCANKMDLHHNKMDLHHHHLCAHHLHHLCAHHL + + >random_sequence_07_consisting_of_30_residues + FHHLCANKMDLHHHHLCAHHLVPGKQEPDS + + >random_sequence_08_consisting_of_40_residues + HHLCANKMDLHHHHLCAHHLCANKMDLNKMDLLCANKMDL + + >random_sequence_09_consisting_of_30_residues + GNKTPFMKMHGGNKTPFMKMHNKTPFMKMH + + >random_sequence_10_consisting_of_65_residues + LDNSKMWQLDNPMPMSKMWQLDNSKMWQLDNLDNSKMWQLDNPMPMSKMWQLDNSKMWQLDNAAA + + >random_sequence_11_consisting_of_30_residues + PMSKMWQLDNMSKMWQLDNPMSKMWQLDNA + +There will be the next results if all the predictor tools (DynaMine, DisoMine, EFoldMine, and AgMata) are selected: + +JSON result +........... +Containing all the input sequences and their predicted features depending on the selected predictors. +:: + + { + "random_sequence_03_consisting_of_30_residues": { + "agmata": [ + 0.012, + 0.08, + 0.119, + 0.154, + 0.2, + 0.34, + 1.023, + 4.084, + 12.328, + 20.723, + 25.866, + 24.864, + 17.509, + 9.32, + 3.591, + 1.559, + 0.64, + 0.221, + 0.084, + 0.038, + 0.031, + 0.035, + 0.052, + 0.159, + 1.115, + 7.747, + 8.329, + 8.214, + 7.259, + 0.608 + ], + "backbone": [ + 0.818, + 0.833, + 0.87, + ... + ], + ... + }, + ... + } + +Tabular results +............... +There will be a tabular files (.TSV) for each sequence where each row is a residue of the sequence and the columns are the predicted features depending on the selected predictors. + +:: + + residue_index residue agmata backbone coil disoMine earlyFolding helix ppII sheet sidechain + 0 Y 0.012 0.818 0.376 0.783 0.031 0.262 0.074 0.295 0.624 + 1 S 0.08 0.833 0.373 0.818 0.05 0.253 0.07 0.33 0.59 + 2 W 0.119 0.87 0.361 0.772 0.061 0.27 0.057 0.341 0.696 + 3 T 0.154 0.906 0.336 0.641 0.064 0.334 0.037 0.341 0.665 + 4 H 0.2 0.945 0.295 0.596 0.099 0.421 0.017 0.324 0.673 + 5 Y 0.34 0.969 0.261 0.583 0.15 0.47 0.015 0.339 0.644 + 6 E 1.023 0.967 0.251 0.534 0.187 0.494 0.024 0.354 0.438 + 7 L 4.084 0.959 0.247 0.441 0.291 0.481 0.025 0.382 0.677 + 8 K 12.328 0.972 0.237 0.355 0.294 0.464 0.024 0.429 0.449 + 9 A 20.723 0.974 0.239 0.273 0.274 0.442 0.026 0.436 0.685 + 10 V 25.866 0.984 0.233 0.231 0.336 0.415 0.028 0.45 0.684 + 11 W 24.864 1.016 0.222 0.193 0.408 0.412 0.023 0.471 0.722 + 12 C 17.51 1.014 0.229 0.174 0.426 0.396 0.016 0.479 0.747 + 13 E 9.32 0.997 0.241 0.155 0.261 0.434 0.016 0.456 0.403 + 14 L 3.591 0.99 0.255 0.142 0.259 0.443 0.012 0.422 0.704 + 15 T 1.559 0.977 0.277 0.138 0.278 0.45 0.019 0.379 0.72 + 16 Y 0.64 0.969 0.299 0.134 0.282 0.428 0.025 0.353 0.656 + 17 W 0.221 0.976 0.317 0.135 0.262 0.415 0.027 0.328 0.704 + 18 R 0.084 0.957 0.331 0.132 0.147 0.41 0.029 0.295 0.462 + 19 S 0.038 0.956 0.342 0.148 0.142 0.393 0.028 0.289 0.627 + 20 W 0.031 0.947 0.355 0.182 0.178 0.379 0.031 0.304 0.71 + 21 T 0.035 0.964 0.333 0.205 0.187 0.393 0.023 0.305 0.705 + 22 H 0.052 0.976 0.286 0.228 0.19 0.451 0.013 0.309 0.663 + 23 Y 0.159 0.967 0.268 0.229 0.185 0.503 0.016 0.296 0.635 + 24 E 1.115 0.949 0.26 0.226 0.187 0.528 0.022 0.319 0.385 + 25 L 7.747 0.929 0.243 0.215 0.261 0.519 0.023 0.358 0.649 + 26 K 8.329 0.924 0.225 0.253 0.247 0.487 0.023 0.405 0.399 + 27 A 8.214 0.906 0.22 0.313 0.25 0.452 0.031 0.451 0.606 + 28 V 7.259 0.89 0.218 0.405 0.076 0.443 0.042 0.455 0.607 + 29 V 0.608 0.871 0.242 0.605 0.029 0.418 0.051 0.426 0.596 + +Plots +..... + +Depending on the options selected, there will be a plot (built using ``matplotlib``) +per prediction per sequence and/or a plot per prediction for all the sequences together. + +- Checking **Plot** parameter implies that predicted features will be plotted in different files by input sequence. + - 11 input sequences x 9 predicted features = 99 plots in PNG format + - ``sequence_01_agmata`` plot + - ``sequence_01_backbone`` plot + - ``sequence_01_coil`` plot + - ``sequence_01_disoMine`` plot + - ``sequence_01_earlyFolding`` plot + - ``sequence_01_helix`` plot + - ``sequence_01_ppII`` plot + - ``sequence_01_sheet`` plot + - ``sequence_01_sidechain`` plot + - ... + - ``sequence_11_sidechain`` plot + +- Checking **Plot all** parameter implies that all the input sequences will be plotted together in order to compare predicted features. + - 9 predicted features = 9 plots in PNG format + - **agmata plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **backbone plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **coil plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **disoMine plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **earlyFolding plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **helix plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **ppII plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **sheet plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + - **sidechain plot** containing 11 series (``sequence_01``, ``sequence_02``, ..., ``sequence_11``) + +About Bio2Byte +-------------- + +**We research the relation between protein sequence and biophysical behavior.** + +Proteins are the molecular machines that make cells work. +They perform a wide variety of functions through interactions with each other and many additional molecules. +Traditionally, proteins are described in a single static state (a picture). +It is now increasingly recognized that many proteins can adopt multiple states and move between these conformational states dynamically (a movie). + +We investigate how the dynamics, conformational states, and available experimental data of proteins relate to their amino acid sequence. +Underlying physical and chemical principles are computationally unraveled through data integration, +analysis and machine learning, so connecting them to biological events and improving our understanding of the way proteins work. + +Visit our website for further information: https://bio2byte.be + +About Bio2Byte tools +'''''''''''''''''''' +The software suite "Bio2byte tools", known as ``b2btools`` offers a set of biophysical predictions for both single protein sequences and MSA input files. + +**Useful links:** + +``B2bTools`` is also available on the *Python Package Index* (``PyPI``) https://pypi.org/project/b2bTools/, as well as on *Bioconda* https://bioconda.github.io/recipes/b2btools/README.html. +We also provide a set of online examples in Jupyter Notebook format that are available to run on the Google Colab platform on https://github.com/Bio2Byte/public_notebooks. + + ]]> + </help> + <creator> + <organization name="bio2Byte" url="https://bio2byte.be" email="Wim.Vranken@vub.be"/> + <organization name="Vrije Universiteit Brussel" url="https://vub.be" alternateName="VUB"/> + <person honorificPrefix="Prof." givenName="Wim" familyName="Vranken" email="Wim.Vranken@vub.be" identifier="http://orcid.org/0000-0001-7470-4324" /> + <person givenName="Jose" familyName="Gavalda-Garcia" email="Jose.Gavalda.Garcia@vub.be" identifier="http://orcid.org/0000-0001-6431-3442" /> + <person givenName="Adrian" familyName="Diaz" email="Adrian.Diaz@vub.be" identifier="http://orcid.org/0000-0003-0165-1318" /> + </creator> + <citations> + <citation type="doi">10.1038/ncomms3741</citation> + <citation type="doi">10.1101/2020.05.25.115253</citation> + <citation type="doi">10.1038/s41598-017-08366-3</citation> + <citation type="doi">10.1093/bioinformatics/btz912</citation> + </citations> +</tool>