Mercurial > repos > recetox > ms2deepscore_training
comparison macros.xml @ 0:0a0529822d91 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ms2deepscore commit 4bd610e0cbbcbed51a6bfb880179777fc8034fd6
| author | recetox |
|---|---|
| date | Mon, 02 Sep 2024 12:12:30 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0a0529822d91 |
|---|---|
| 1 <macros> | |
| 2 <token name="@TOOL_VERSION@">2.0.0</token> | |
| 3 <token name="@ONNX_VERSION@">1.16.2</token> | |
| 4 | |
| 5 <xml name="creator"> | |
| 6 <creator> | |
| 7 <person | |
| 8 givenName="Zargham" | |
| 9 familyName="Ahmad" | |
| 10 url="https://github.com/zargham-ahmad" | |
| 11 identifier="0000-0002-6096-224X" /> | |
| 12 <organization | |
| 13 url="https://www.recetox.muni.cz/" | |
| 14 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" | |
| 15 name="RECETOX MUNI" /> | |
| 16 </creator> | |
| 17 </xml> | |
| 18 | |
| 19 <xml name="edam"> | |
| 20 <xrefs> | |
| 21 <xref type="bio.tools">ms2deepscore</xref> | |
| 22 </xrefs> | |
| 23 </xml> | |
| 24 | |
| 25 <xml name="input_param"> | |
| 26 <conditional name="scores"> | |
| 27 <param name="use_scores" label="Use Scores Object" type="select"> | |
| 28 <option value="False" selected="true">FALSE</option> | |
| 29 <option value="True">TRUE</option> | |
| 30 </param> | |
| 31 <when value="True"> | |
| 32 <param label="Scores object" name="scores_in" type="data" format="json" | |
| 33 help="Scores objects calculated previously using one of the matchms similarity tools." /> | |
| 34 </when> | |
| 35 <when value="False"> | |
| 36 <param label="Queries spectra" name="queries" type="data" format="msp" | |
| 37 help="Query mass spectra to match against references."/> | |
| 38 <param label="Reference spectra" name="references" type="data" format="msp" | |
| 39 help="Reference mass spectra to match against as library."/> | |
| 40 </when> | |
| 41 </conditional> | |
| 42 <param name="model" type="data" format="onnx" label="Model" | |
| 43 help="Select the trained MS2DeepScore model file (onnx format) in the ONNX format as created by the 'MS2DeepScore Training' tool."/> | |
| 44 <param name="model_param" type="data" format="json" label="Configuration" | |
| 45 help="Select the MS2DeepScore model configurations in JSON format. Can be created using the 'MS2DeepScore Config Generator' tool."/> | |
| 46 </xml> | |
| 47 | |
| 48 <xml name="training_param"> | |
| 49 <param label="Training Dataset" name="spectra" type="data" format="msp,mgf" | |
| 50 help="Spectra file that should be used for training. (it will be split in training, validation and test sets)."/> | |
| 51 <param name="model_param" type="data" format="json" label="Model Settings" help="json file with the MS2Deepscore model settings."/> | |
| 52 <param name="validation_split_fraction" type="integer" min="0" max="100" value="20" label="Validation split fraction [%]" | |
| 53 help="The fraction of the inchikeys that will be used for validation and test"/> | |
| 54 </xml> | |
| 55 | |
| 56 <xml name="config_generator"> | |
| 57 <section name="model_structure" title="Model Structure" expanded="true"> | |
| 58 <repeat name="layers" title="Layer" min="1" default="1" > | |
| 59 <param name="dims" type="integer" label="Dimensions" min = "0" value="2000" help="Size of the in-between layer to add." /> | |
| 60 </repeat> | |
| 61 <param name="embedding_dim" type="integer" label="Embedding Dimension" value="400" help="The dimension of the final embedding layer." /> | |
| 62 <param name="ionisation_mode" type="select" label="Ionisation Mode"> | |
| 63 <option value="positive" selected="true">Positive</option> | |
| 64 <option value="negative">Negative</option> | |
| 65 <option value="both">Both</option> | |
| 66 </param> | |
| 67 </section> | |
| 68 | |
| 69 <section name="tensorization_settings" title="Tensorization Settings" expanded="true"> | |
| 70 <param name="min_mz" type="integer" label="Min m/z" value="10" /> | |
| 71 <param name="max_mz" type="integer" label="Max m/z" value="1000" /> | |
| 72 <param name="mz_bin_width" type="float" label="m/z Bin Width" value="0.1" /> | |
| 73 <param name="intensity_scaling" type="float" label="Intensity Scaling" value="0.5" /> | |
| 74 <param name="fingerprint_type" type="text" value="daylight" label="Fingerprint Type" help="The fingerprint type that should be used for tanimoto score calculations." /> | |
| 75 <param name="fingerprint_nbits" type="integer" label="Fingerprint Number of Bits" value="2048" help="The number of bits to use for the fingerprint." /> | |
| 76 </section> | |
| 77 | |
| 78 | |
| 79 <section name="training_settings" title="Training Settings" expanded="false"> | |
| 80 <param name="dropout_rate" type="float" label="Dropout Rate" value="0.0" /> | |
| 81 <param name="learning_rate" type="float" label="Learning Rate" value="0.00025" /> | |
| 82 <param name="epochs" type="integer" label="Epochs" value="250" /> | |
| 83 <param name="patience" type="integer" label="Patience" value="20" help="How long the model should keep training if validation does not improve" /> | |
| 84 <param name="loss_function" type="select" label="Loss Function"> | |
| 85 <option value="mse" selected="true">Mean Squared Error (mse)</option> | |
| 86 <option value="mae">Mean Absolute Error (mae)</option> | |
| 87 <option value="rmse">Root Mean Squared Error (rmse)</option> | |
| 88 <option value="risk_mae">Risk Aware MAE (risk_aware_mae)</option> | |
| 89 <option value="risk_mse">Risk Aware MSE (risk_aware_mse)</option> | |
| 90 </param> | |
| 91 <param name="weighting_factor" type="integer" label="Weighting Factor" value="0" /> | |
| 92 <param name="batch_size" type="integer" value="32" label="Batch Size" help="Number of pairs per batch" /> | |
| 93 <param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." /> | |
| 94 <param name="random_seed" type="text" label="Random seed" value="None" help="Specify random seed for reproducible random number generation." /> | |
| 95 </section> | |
| 96 </xml> | |
| 97 | |
| 98 <xml name="citations"> | |
| 99 <citations> | |
| 100 <citation type="doi">https://doi.org/10.1186/s13321-021-00558-4</citation> | |
| 101 <citation type="doi">https://doi.org/10.1101/2024.03.25.586580</citation> | |
| 102 </citations> | |
| 103 </xml> | |
| 104 | |
| 105 | |
| 106 <token name="@HELP@"> | |
| 107 ms2deepscore provides a Siamese neural network that is trained to predict molecular structural similarities (Tanimoto scores) from pairs of mass spectrometry spectra. | |
| 108 The library provides an intuitive classes to prepare data, train a siamese model, and compute similarities between pairs of spectra. | |
| 109 In addition to the prediction of a structural similarity, MS2DeepScore can also make use of Monte-Carlo dropout to assess the model uncertainty. | |
| 110 MS2DeepScore is able to identify highly-reliable structural matches and to predict Tanimoto scores for pairs of molecules based on their fragment spectra with a root mean squared error of about 0.15. | |
| 111 Furthermore, the prediction uncertainty estimate can be used to select a subset of predictions with a root mean squared error of about 0.1. | |
| 112 MS2DeepScore can also be used to create chemically meaningful mass spectral embeddings that could be used to cluster large numbers of spectra. | |
| 113 </token> | |
| 114 | |
| 115 | |
| 116 <token name="@init_scores@"> | |
| 117 from matchms.importing import load_from_msp, scores_from_json | |
| 118 from matchms import Scores | |
| 119 #if $scores.use_scores == "True" | |
| 120 scores = scores_from_json("${scores_in}") | |
| 121 #else | |
| 122 scores = Scores(references=list(load_from_msp("$references")), queries=list(load_from_msp("$queries")), is_symmetric=False) | |
| 123 #end if | |
| 124 </token> | |
| 125 | |
| 126 <token name="@init_logger@"> | |
| 127 from matchms import set_matchms_logger_level | |
| 128 set_matchms_logger_level("WARNING") | |
| 129 </token> | |
| 130 | |
| 131 <token name="@json_load@"> | |
| 132 import numpy as np | |
| 133 import json | |
| 134 | |
| 135 with open("$model_param", 'r') as json_file: | |
| 136 model_params = json.load(json_file) | |
| 137 | |
| 138 # Conditionally convert specific keys if they are present | |
| 139 if 'base_dims' in model_params: | |
| 140 model_params['base_dims'] = tuple(model_params['base_dims']) | |
| 141 | |
| 142 if 'same_prob_bins' in model_params: | |
| 143 model_params['same_prob_bins'] = np.array(model_params['same_prob_bins']) | |
| 144 | |
| 145 if 'additional_metadata' in model_params: | |
| 146 model_params['additional_metadata'] = [ | |
| 147 (entry[0], entry[1]) for entry in model_params['additional_metadata'] | |
| 148 ] | |
| 149 </token> | |
| 150 </macros> |
