comparison macros.xml @ 2:472dc85ce7c5 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 506df2aef355b3791567283e1a175914f06b405a
author recetox
date Mon, 13 Feb 2023 10:28:35 +0000
parents 067a308223e3
children c69a12bfc2fb
comparison
equal deleted inserted replaced
1:f9fb9d8fb710 2:472dc85ce7c5
1 <macros> 1 <macros>
2 <token name="@TOOL_VERSION@">0.9.4</token> 2 <token name="@TOOL_VERSION@">0.10.1</token>
3 <xml name="requirements"> 3 <xml name="requirements">
4 <requirements> 4 <requirements>
5 <requirement type="package" version="4.1.0">r-base</requirement>
6 <requirement type="package" version="4.0.1">r-arrow</requirement>
7 <requirement type="package" version="@TOOL_VERSION@">r-recetox-aplcms</requirement> 5 <requirement type="package" version="@TOOL_VERSION@">r-recetox-aplcms</requirement>
8 <requirement type="package" version="1.0.7">r-dplyr</requirement> 6 <requirement type="package" version="2.5.2">pymzml</requirement>
9 </requirements> 7 </requirements>
10 </xml> 8 </xml>
11 9
12 <xml name="creator"> 10 <xml name="creator">
13 <creator> 11 <creator>
29 <person 27 <person
30 givenName="Jiří" 28 givenName="Jiří"
31 familyName="Novotný" 29 familyName="Novotný"
32 url="https://github.com/xtracko" 30 url="https://github.com/xtracko"
33 identifier="0000-0001-5449-3523" /> 31 identifier="0000-0001-5449-3523" />
32 <person
33 givenName="Helge"
34 familyName="Hecht"
35 url="https://github.com/hechth"
36 identifier="0000-0001-6744-996X" />
34 <organization 37 <organization
35 url="https://www.recetox.muni.cz/" 38 url="https://www.recetox.muni.cz/"
36 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" 39 email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
37 name="RECETOX MUNI"/> 40 name="RECETOX MUNI"/>
38 </creator> 41 </creator>
39 </xml> 42 </xml>
40 43
41 <xml name="inputs"> 44 <xml name="remove_noise_params">
42 <inputs> 45 <param name="min_pres" type="float" value="0.5" label="min_pres"
43 <param name="files" type="data" format="mzdata,mzml,mzxml,netcdf" multiple="true" min="3" label="data" 46 help="The minimum proportion of presence in the time period for a series of signals grouped by m/z to be considered a peak." />
44 help="Mass spectrometry files for peak extraction." /> 47 <param name="min_run" type="float" value="12" label="min_run"
45 <yield /> 48 help="The minimum length of elution time for a series of signals grouped by m/z to be considered a peak." />
46 </inputs> 49 <param name="mz_tol" type="float" value="1e-05" label="mz_tol"
50 help="The m/z tolerance level for the grouping of data points. This value is expressed as the fraction of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. The recommended value is the machine's nominal accuracy level. Divide the ppm value by 1e6. For FTMS, 1e-5 is recommended." />
51 <param name="baseline_correct" type="float" value="0" label="baseline_correct"
52 help="After grouping the observations, the highest intensity in each group is found. If the highest is lower than this value, the entire group will be deleted. The default value is NA, in which case the program uses a percentile of the height of the noise groups. If given a value, the value will be used as the threshold, and baseline.correct.noise.percentile will be ignored." />
53 <param name="intensity_weighted" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="intensity_weighted"
54 help="Whether to weight the local density by signal intensities in initial peak detection." />
47 </xml> 55 </xml>
48 56
49 <xml name="history_db"> 57 <xml name="generate_feature_table_params">
50 <param name="known_table" type="data" format="parquet" label="known_table" 58 <param name="sd_cut_min" type="float" value="0.01" label="sd_cut_min"
51 help="A data table containing the known metabolite ions and previously found features. The table must contain these 18 columns: chemical_formula (optional), HMDB_ID (optional), KEGG_compound_ID (optional), neutral.mass (optional), ion.type (the ion form - optional), m.z (either theoretical or mean observed m/z value of previously found features), Number_profiles_processed (the total number of processed samples to build this database), Percent_found (the percentage of historically processed samples in which the feature appeared), mz_min (minimum observed m/z value), mz_max (maximum observed m/z value), RT_mean (mean observed retention time), RT_sd (standard deviation of observed retention time), RT_min (minimum observed retention time), RT_max (maximum observed retention time), int_mean.log. (mean observed log intensity), int_sd.log. (standard deviation of observed log intensity), int_min.log. (minimum observed log intensity), int_max.log. (maximum observed log intensity)." /> 59 help="The minimum standard deviation of a feature to be not eliminated." />
52 <section name="history_db" title="Known-Table settings"> 60 <param name="sd_cut_max" type="float" value="500" label="sd_cut_max"
53 <param name="match_tol_ppm" type="integer" optional="true" min="0" label="match_tol_ppm (optional)" 61 help="The maximum standard deviation of a feature to be not eliminated." />
54 help="The ppm tolerance to match identified features to known metabolites/features." /> 62 <conditional name="shape">
55 <param name="new_feature_min_count" type="integer" value="2" min="1" label="new_feature_min_count" 63 <param name="shape_model" type="select" display="radio" label="shape_model"
56 help="The minimum number of occurrences of a historically unseen (unknown) feature to add this feature into the database of known features." />
57 </section>
58 </xml>
59
60 <xml name="noise_filtering">
61 <section name="noise_filtering" title="Noise filtering and peak detection">
62 <yield />
63 <param name="min_pres" type="float" value="0.5"
64 label="min_pres"
65 help="The minimum proportion of presence in the time period for a series of signals grouped by m/z to be considered a peak." />
66 <param name="min_run" type="float" value="12"
67 label="min_run"
68 help="The minimum length of elution time for a series of signals grouped by m/z to be considered a peak." />
69 <param name="mz_tol" type="float" value="1e-05"
70 label="mz_tol"
71 help="The m/z tolerance level for the grouping of data points. This value is expressed as the fraction of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. The recommended value is the machine's nominal accuracy level. Divide the ppm value by 1e6. For FTMS, 1e-5 is recommended." />
72 <param name="baseline_correct" type="float" value="0" label="baseline_correct"
73 help="After grouping the observations, the highest intensity in each group is found. If the highest is lower than this value, the entire group will be deleted. The default value is NA, in which case the program uses a percentile of the height of the noise groups. If given a value, the value will be used as the threshold, and baseline.correct.noise.percentile will be ignored." />
74 <param name="baseline_correct_noise_percentile" type="float" value="0.05"
75 label="baseline_correct_noise_percentile"
76 help="The percentile of signal strength of those EIC that don't pass the run filter, to be used as the baseline threshold of signal strength." />
77 <param name="intensity_weighted" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE"
78 label="intensity_weighted"
79 help="Whether to weight the local density by signal intensities in initial peak detection." />
80 </section>
81 </xml>
82
83 <xml name="feature_detection">
84 <section name="feature_detection" title="Feature detection">
85 <param name="shape_model" type="select" display="radio"
86 label="shape_model"
87 help="The mathematical model for the shape of a peak. There are two choices - bi-Gaussian and Gaussian. When the peaks are asymmetric, the bi-Gaussian is better."> 64 help="The mathematical model for the shape of a peak. There are two choices - bi-Gaussian and Gaussian. When the peaks are asymmetric, the bi-Gaussian is better.">
88 <option value="Gaussian">Gaussian</option> 65 <option value="Gaussian">Gaussian</option>
89 <option value="bi-Gaussian" selected="true">bi-Gaussian</option> 66 <option value="bi-Gaussian" selected="true">bi-Gaussian</option>
90 </param> 67 </param>
91 <param name="BIC_factor" type="float" value="2.0" 68 <when value="bi-Gaussian">
92 label="BIC_factor" 69 <param name="sigma_ratio_lim_min" type="float" value="0.01" label="sigma_ratio_lim_min"
93 help="The factor that is multiplied on the number of parameters to modify the BIC criterion. If larger than 1, models with more peaks are penalized more." /> 70 help="The lower limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
94 <param name="peak_estim_method" type="select" display="radio" 71 <param name="sigma_ratio_lim_max" type="float" value="100" label="sigma_ratio_lim_max"
95 label="peak_estim_method" 72 help="The upper limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
96 help="The estimation method for the bi-Gaussian peak model. Two possible values: moment and EM."> 73 </when>
97 <option value="moment" selected="true">Moment</option> 74 </conditional>
98 <option value="EM">EM</option> 75 <param name="peak_estim_method" type="select" display="radio" label="peak_estim_method"
99 </param> 76 help="The estimation method for the bi-Gaussian peak model. Two possible values: moment and EM.">
100 <param name="min_bandwidth" type="float" optional="true" 77 <option value="moment" selected="true">Moment</option>
101 label="min_bandwidth (optional)" 78 <option value="EM">EM</option>
102 help="The minimum bandwidth to use in the kernel smoother." /> 79 </param>
103 <param name="max_bandwidth" type="float" optional="true" 80 <param name="moment_power" type="float" value="1" label="moment_power"
104 label="max_bandwidth (optional)" 81 help="The power parameter for data transformation when fitting the bi-Gaussian or Gaussian mixture model in an EIC." />
105 help="The maximum bandwidth to use in the kernel smoother." /> 82 <param name="component_eliminate" type="float" value="0.01" label="component_eliminate"
106 <param name="sd_cut_min" type="float" value="0.01" 83 help="In fitting mixture of bi-Gaussian (or Gaussian) model of an EIC, when a component accounts for a proportion of intensities less than this value, the component will be ignored." />
107 label="sd_cut_min" 84 <param name="BIC_factor" type="float" value="2.0" label="BIC_factor"
108 help="The minimum standard deviation of a feature to be not eliminated." /> 85 help="A factor influencing Bayesian information criterion (BIC) in estimation of RT peak shape. If the value is larger than 1, models with more peaks are penalized more." />
109 <param name="sd_cut_max" type="float" value="500"
110 label="sd_cut_max"
111 help="The maximum standard deviation of a feature to be not eliminated." />
112 <param name="sigma_ratio_lim_min" type="float" value="0.01"
113 label="sigma_ratio_lim_min"
114 help="The lower limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
115 <param name="sigma_ratio_lim_max" type="float" value="100"
116 label="sigma_ratio_lim_max"
117 help="The upper limit of the believed ratio range between the left-standard deviation and the right-standard deviation of the bi-Gaussian function used to fit the data." />
118 <param name="component_eliminate" type="float" value="0.01"
119 label="component_eliminate"
120 help="In fitting mixture of bi-Gaussian (or Gaussian) model of an EIC, when a component accounts for a proportion of intensities less than this value, the component will be ignored." />
121 <param name="moment_power" type="float" value="1"
122 label="moment_power"
123 help="The power parameter for data transformation when fitting the bi-Gaussian or Gaussian mixture model in an EIC." />
124 </section>
125 </xml> 86 </xml>
126 87
127 <xml name="peak_alignment"> 88 <xml name="compute_clusters_params">
128 <section name="peak_alignment" title="Peak Alignment"> 89 <conditional name="tolerances_input_method">
129 <param name="align_chr_tol" type="float" optional="true" 90 <param name="input_method" type="select" display="radio" label="Tolerances input method"
130 label="align_chr_tol (optional)" 91 help="Tolerances can be entered directly or loaded from a file.">
131 help="The retention time tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data." /> 92 <option value="direct" selected="true">direct</option>
132 <param name="align_mz_tol" type="float" optional="true" 93 <option value="file">file</option>
133 label="align_mz_tol (optional)" 94 </param>
134 help="The m/z tolerance level for peak alignment. The default is NA, which allows the program to search for the tolerance level based on the data. The tolerance is given in absolute numbers, not scaled, i.e. for 10ppm tolerance enter '1e-05'. This value, multiplied by the m/z value, becomes the cutoff level." /> 95 <when value="direct">
135 <param name="max_align_mz_diff" type="float" value="0.01" 96 <param name="mz_tol_relative" type="float" optional="true" label="mz_tol_relative"
136 label="max_align_mz_diff" 97 help="Relative m/z tolerance to use for grouping features." />
137 help="As the m/z tolerance is expressed in relative terms (ppm), it may not be suitable when the m/z range is wide. This parameter limits the tolerance in absolute terms. It mostly influences feature matching in higher m/z range." /> 98 <param name="rt_tol_relative" type="float" optional="true" label="rt_tol_relative"
138 </section> 99 help="Relative retention time tolerance to use for grouping features." />
100 </when>
101 <when value="file">
102 <param label="Input tolerances values" name="input_tolerances" type="data" format="parquet"
103 help="Table containing tolerance values." />
104 </when>
105 </conditional>
106 <param name="mz_tol_absolute" type="float" label="mz_tol_absolute" value="1e-05"
107 help="Absolute m/z tolerance to use for grouping features." />
108 <param name="mz_max_diff" type="float" label="mz_max_diff" value="0.01"
109 help="Maximum difference between feature m/z values to belong to the same cluster." />
139 </xml> 110 </xml>
140 111
141 <xml name="weak_signal_recovery"> 112 <xml name="recover_weaker_params">
142 <section name="weak_signal_recovery" title="Weak Signal Recovery">
143 <param name="recover_mz_range" type="float" optional="true"
144 label="recover_mz_range (optional)"
145 help="The m/z around the feature m/z to search for observations. The default value is NA, in which case 1.5 times the m/z tolerance in the aligned object will be used." />
146 <param name="recover_chr_range" type="float" optional="true"
147 label="recover_chr_range (optional)"
148 help="The retention time around the feature retention time to search for observations. The default value is NA, in which case 0.5 times the retention time tolerance in the aligned object will be used." />
149 <param name="use_observed_range" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
150 label="use_observed_range"
151 help="If the value is true, the actual range of the observed locations of the feature in all the spectra will be used." />
152 <param name="recover_min_count" type="integer" value="3"
153 label="recover_min_count"
154 help="The minimum number of raw data points to be considered as a true feature." />
155 </section>
156 </xml>
157 <xml name="multibatch_processing">
158 <section name="multibatch_processing" title="Multibatch processing">
159 <param name="min_within_batch_prop_detect" type="float" min="0" max="1" value="0.1"
160 label="minimum_batchwise_prop_detect"
161 help="The minimum detection frequency (relative) of a feature for it to be included in the final feature table." />
162 <param name="min_batch_prop" type="float" min="0" max="1" value="0.5"
163 label="minimum_batch_prop"
164 help="The minimum proportion of batches that must have a given feature. The features that are less abundant than the value won't be reported." />
165 <param name="batch_align_mz_tol" type="float" min="0" value="0.00001"
166 label="batch_align_mz_tol"
167 help="The m/z tolerance level for peak alignment within batch." />
168 <param name="batch_align_chr_tol" type="float" min="0" value="50.0"
169 label="batch_align_chr_tol"
170 help="The retention time tolerance level for peak alignment within batch." />
171 </section>
172 </xml>
173
174 <xml name="mz_tol_macro">
175 <param name="mz_tol" type="float" value="1e-05" label="mz_tol" 113 <param name="mz_tol" type="float" value="1e-05" label="mz_tol"
176 help="The m/z tolerance level for the grouping of data points. This value is expressed as the 114 help="The m/z tolerance level for the grouping of data points. This value is expressed as the
177 fraction of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level. 115 fraction of the m/z value. This value, multiplied by the m/z value, becomes the cutoff level.
178 The recommended value is the machine's nominal accuracy level. Divide the ppm value by 1e6. 116 The recommended value is the machine's nominal accuracy level. Divide the ppm value by 1e6.
179 For FTMS, 1e-5 is recommended." /> 117 For FTMS, 1e-5 is recommended." />
118 <param name="recover_mz_range" type="float" optional="true" label="recover_mz_range"
119 help="The m/z around the feature m/z to search for observations. The default value is NA, in which
120 case 1.5 times the m/z tolerance in the aligned object will be used." />
121 <param name="recover_rt_range" type="float" optional="true" label="recover_rt_range"
122 help="The retention time around the feature retention time to search for observations.
123 The default value is NA, in which case 0.5 times the retention time tolerance in the aligned
124 object will be used." />
125 <param name="use_observed_range" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE"
126 label="use_observed_range" help="If the value is true, the actual range of the observed locations of
127 the feature in all the spectra will be used." />
128 <param name="recover_min_count" type="integer" value="3" label="recover_min_count"
129 help="The minimum number of raw data points to be considered as a true feature." />
130 <param name="intensity_weighted" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE"
131 label="intensity_weighted" help="Whether to weight the local density by signal intensities in initial peak detection." />
180 </xml> 132 </xml>
181 133
182 <xml name="output_format"> 134 <xml name="bandwidth_params">
183 <section name="output_format" title="Output Format"> 135 <param name="bandwidth" type="float" value="0.5" label="bandwidth"
184 <param name="out_format" type="boolean" checked="false" truevalue="recetox" falsevalue="original" label="Use custom RECETOX output format?" /> 136 help="A value between zero and one. Multiplying this value to the length of the signal along
185 </section> 137 the time axis helps determine the bandwidth in the kernel smoother used for peak identification." />
186 </xml> 138 <param name="min_bandwidth" type="float" optional="true" label="min_bandwidth"
187 139 help="The minimum bandwidth to use in the kernel smoother." />
188 <xml name="unsupervised_outputs"> 140 <param name="max_bandwidth" type="float" optional="true" label="max_bandwidth"
189 <data name="recovered_feature_sample_table" format="parquet" label="${tool.name} recovered_feature_sample_table on ${on_string}" /> 141 help="The maximum bandwidth to use in the kernel smoother." />
190 <data name="aligned_feature_sample_table" format="parquet" label="${tool.name} aligned_feature_sample_table on ${on_string}" hidden="true" />
191 <yield />
192 </xml> 142 </xml>
193 143
194 <xml name="citations"> 144 <xml name="citations">
195 <citations> 145 <citations>
196 <citation type="doi">10.1093/bioinformatics/btp291</citation> 146 <citation type="doi">10.1093/bioinformatics/btp291</citation>
197 <citation type="doi">10.1186/1471-2105-11-559</citation> 147 <citation type="doi">10.1186/1471-2105-11-559</citation>
198 <citation type="doi">10.1021/pr301053d</citation> 148 <citation type="doi">10.1021/pr301053d</citation>
199 <citation type="doi">10.1093/bioinformatics/btu430</citation> 149 <citation type="doi">10.1093/bioinformatics/btu430</citation>
150 <citation type="doi">10.1038/s41598-020-70850-0</citation>
200 <yield /> 151 <yield />
201 </citations> 152 </citations>
202 </xml> 153 </xml>
203
204 <token name="@HELP_hybrid@">
205 <![CDATA[
206 This is the Hybrid version of apLCMS which is incorporating the knowledge of known metabolites and historically
207 detected features on the same machinery to help detect and quantify lower-intensity peaks.
208
209 CAUTION: To use such knowledge, especially historical data, you must keep using (1) the same chromatography
210 system (otherwise the retention time will not match), and (2) the same type of samples with similar extraction
211 technique, such as human serum.
212
213 @GENERAL_HELP@
214 ]]>
215 </token>
216
217 <token name="@HELP_unsupervised@">
218 <![CDATA[
219 This is the Unsupervised version of apLCMS which is not relying on any existing knowledge about metabolites or
220 any historically detected features. For such functionality please use the Hybrid version of apLCMS.
221
222 @GENERAL_HELP@
223 ]]>
224 </token>
225
226 <token name="@HELP_two-step-hybrid@">
227 <![CDATA[
228 This is the **Two-Step Hybrid** version of **apLCMS**. This tool is improved upon the Hybrid version by accounting for the batch
229 effects in multi-batch experiments. As in the Hybrid version, this tool incorporates the knowledge of known metabolites and
230 historically detected features on the same machinery to help detect and quantify lower-intensity peaks.
231
232 **CAUTION**: To use such knowledge, especially historical data, you must keep using (1) the same chromatography
233 system (otherwise the retention time will not match), and (2) the same type of samples with similar extraction
234 technique, such as human serum.
235
236 @GENERAL_HELP@
237 ]]>
238 </token>
239
240 <token name="@GENERAL_HELP@">
241 apLCMS is a software which generates a feature table from a batch of LC/MS spectra. The m/z and retention time
242 tolerance levels are estimated from the data. A run-filter is used to detect peaks and remove noise.
243 Non-parametric statistical methods are used to find-tune peak selection and grouping. After retention time
244 correction, a feature table is generated by aligning peaks across spectra. For further information on apLCMS
245 please refer to https://mypage.cuhk.edu.cn/academics/yutianwei/apLCMS/.
246 </token>
247 </macros> 154 </macros>