Mercurial > repos > galaxyp > cardinal_single_ion_segmentation
comparison spatial_DGMM.xml @ 0:4cb6c83d3777 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit badc51fcd74ba0c14cd1ae64d5f524291fa11441"
author | galaxyp |
---|---|
date | Tue, 22 Feb 2022 20:51:09 +0000 |
parents | |
children | db423b7bce78 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4cb6c83d3777 |
---|---|
1 <tool id="cardinal_single_ion_segmentation" name="MSI single ion segmentation" version="@VERSION@.0"> | |
2 <description>mass spectrometry imaging spatial DGMM</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"> | |
7 <requirement type="package" version="2.3">r-gridextra</requirement> | |
8 </expand> | |
9 <command detect_errors="exit_code"> | |
10 <![CDATA[ | |
11 | |
12 @INPUT_LINKING@ | |
13 cat '${MSI_spatial_DGMM}' && | |
14 Rscript '${MSI_spatial_DGMM}' | |
15 | |
16 ]]> | |
17 </command> | |
18 <configfiles> | |
19 <configfile name="MSI_spatial_DGMM"><![CDATA[ | |
20 | |
21 ################################# load libraries and read file ################# | |
22 | |
23 library(Cardinal) | |
24 library(gridExtra) | |
25 | |
26 @READING_MSIDATA_FULLY_COMPATIBLE@ | |
27 | |
28 #if str($sample_groups.group) == "multiple_groups": | |
29 ## read and extract x,y,annotation information | |
30 input_tabular <- read.delim("$sample_groups.annotation_file", header = $sample_groups.tabular_header, stringsAsFactors = FALSE) | |
31 annotation_input <- input_tabular[,c($sample_groups.column_x, $sample_groups.column_y, $sample_groups.column_names)] | |
32 annotation_name <- colnames(annotation_input)[3] ##extract header for annotations to later export tabular with same name | |
33 colnames(annotation_input) <- c("x", "y", "annotation") ## rename annotations header to default name "annotation" | |
34 | |
35 ## merge with coordinate information of msidata | |
36 msidata_coordinates <- data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata))) | |
37 colnames(msidata_coordinates) <- c("x", "y", "pixel_index") | |
38 merged_annotation <- merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) | |
39 merged_annotation[is.na(merged_annotation)] <- "NA" | |
40 merged_annotation <- merged_annotation[order(merged_annotation\$pixel_index),] | |
41 msidata\$annotation <- as.character(merged_annotation[,4]) | |
42 #end if | |
43 | |
44 | |
45 @DATA_PROPERTIES_INRAM@ | |
46 | |
47 | |
48 ## remove duplicated coordinates | |
49 msidata <- msidata[,!duplicated(coord(msidata))] | |
50 | |
51 | |
52 ######################################## PDF ################################### | |
53 ################################################################################ | |
54 ################################################################################ | |
55 | |
56 | |
57 pdf("single_ion_segmentation.pdf", fonts = "Times", pointsize = 12) | |
58 plot(0,type='n',axes=FALSE,ann=FALSE) | |
59 | |
60 title(main=paste0("Single ion segmentation for file: \n\n", "$infile.display_name")) | |
61 | |
62 grid.table(property_df, rows= NULL) | |
63 | |
64 if (npeaks > 0) | |
65 { | |
66 | |
67 ## set seed to make analysis reproducible | |
68 set.seed($setseed) | |
69 | |
70 | |
71 ## single ion segmentation | |
72 dgmm <- spatialDGMM(msidata, | |
73 r = c($r), | |
74 k = c($k), | |
75 #if str($sample_groups.group) == 'single_group': | |
76 groups = as.factor(rep("$infile.display_name", ncol(msidata))), | |
77 #else | |
78 groups = msidata\$annotation, | |
79 #end if | |
80 method = "$method", | |
81 dist = "$dist", | |
82 annealing = $annealing, | |
83 init = "$init", | |
84 p0 = $p0, | |
85 iter.max = $iter_max, | |
86 tol = $tol) | |
87 | |
88 ## Summary results table | |
89 dgmm_summary <- as.data.frame(summary(dgmm)) | |
90 colnames(dgmm_summary) <- c('r', 'k', 'Feature', 'Classes/group') | |
91 dgmm_summary\$'m/z' <- mz(msidata) | |
92 feature_n <- dgmm_summary\$Feature | |
93 dgmm_summary\$Feature <- NULL | |
94 dgmm_summary\$Feature <- feature_n | |
95 write.table(dgmm_summary, file="$dgmm_summary", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
96 | |
97 ## Results images | |
98 for (dgmm_repeat in 1:nrow(dgmm_summary)){ | |
99 print(image(dgmm, values="class", model=dgmm_repeat))} | |
100 dev.off() ## closes pdf file | |
101 | |
102 ## optional outputs | |
103 pixel_names <- paste0("xy_", coord(dgmm)\$x, "_", coord(dgmm)\$y) | |
104 | |
105 #if $output_probability: | |
106 dir.create("DGMM_probability") | |
107 for (dgmm_repeat in 1:nrow(dgmm_summary)){ | |
108 name_repeat <- file.path(paste0("DGMM_probability/probability_r", dgmm_summary\$r[dgmm_repeat], "_k", dgmm_summary\$k[dgmm_repeat], "_mz", dgmm_summary\$`m/z`[dgmm_repeat], ".tabular")) | |
109 prob_df <- data.frame(coord(dgmm)\$x, coord(dgmm)\$y, pixel_names, resultData(dgmm, dgmm_repeat, "class"), resultData(dgmm, dgmm_repeat, "probability")) | |
110 colnames(prob_df)[1:4] <- c("x", "y", "pixel_names", "class") | |
111 write.table(prob_df, file=name_repeat, quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
112 } | |
113 #end if | |
114 | |
115 #if $output_estimates: | |
116 dir.create("DGMM_estimates") | |
117 for (dgmm_repeat in 1:nrow(dgmm_summary)){ | |
118 name_repeat <- file.path(paste0("DGMM_estimates/estimates_r", dgmm_summary\$r[dgmm_repeat], "_k", dgmm_summary\$k[dgmm_repeat], "_mz", dgmm_summary\$`m/z`[dgmm_repeat], ".tabular")) | |
119 est_df <- resultData(dgmm, dgmm_repeat, "estimates") | |
120 write.table(est_df, file=name_repeat, quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
121 } | |
122 #end if | |
123 | |
124 #if $output_plots: | |
125 dir.create("DGMM_plots") | |
126 for (dgmm_repeat in 1:nrow(dgmm_summary)){ | |
127 name_repeat <- file.path(paste0("DGMM_plots/plot_r", dgmm_summary\$r[dgmm_repeat], "_k", dgmm_summary\$k[dgmm_repeat], "_mz", dgmm_summary\$`m/z`[dgmm_repeat], ".png")) | |
128 png(file=name_repeat) | |
129 print(plot(dgmm, model=dgmm_repeat, lwd=2)) | |
130 dev.off() | |
131 } | |
132 #end if | |
133 | |
134 ## optional output as .RData | |
135 #if $output_rdata: | |
136 ## save as (.RData) | |
137 save(dgmm, file="$dgmm_rdata") | |
138 #end if | |
139 | |
140 }else{ | |
141 print("Inputfile has no intensities > 0") | |
142 } | |
143 | |
144 ]]></configfile> | |
145 </configfiles> | |
146 <inputs> | |
147 <expand macro="reading_msidata"/> | |
148 <conditional name="sample_groups"> | |
149 <param name="group" type="select" label="Dataset groups" help="Pixels from different groups will be segmented separately. For the validity of | |
150 downstream statistical analysis, it is important that each distinct observational unit (e.g., tissue sample) is assigned to a unique group"> | |
151 <option value="single_group" selected="True">Dataset is a single group</option> | |
152 <option value="multiple_groups">Dataset contains multiple groups</option> | |
153 </param> | |
154 <when value="single_group"/> | |
155 <when value="multiple_groups"> | |
156 <expand macro="reading_pixel_annotations"/> | |
157 </when> | |
158 </conditional> | |
159 <param name="r" type="text" value="2" | |
160 label="r" help="The spatial neighborhood radius of nearby pixels to consider. Only a single value is allowed"> | |
161 <expand macro="sanitizer_multiple_digits"/> | |
162 </param> | |
163 <param name="k" type="text" value="5" | |
164 label="k" help="The maximum number of segments (clusters). The final number of segments may differ. Only a single value is allowed."> | |
165 <expand macro="sanitizer_multiple_digits"/> | |
166 </param> | |
167 <param name="method" type="select" display="radio" | |
168 label="weights method" help="The method to use to calculate the spatial smoothing weights. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | |
169 <option value="gaussian" selected="True">gaussian</option> | |
170 <option value="adaptive">adaptive</option> | |
171 </param> | |
172 <param name="dist" type="select" display="radio" | |
173 label="distance metric" help="The type of distance metric to use when calculating neighboring pixels based on r. The options are ‘radial’, ‘manhattan’, ‘minkowski’, and ‘chebyshev’ (the | |
174 default)."> | |
175 <option value="chebyshev" selected="True">chebyshev</option> | |
176 <option value="manhattan">manhattan</option> | |
177 <option value="radial">radial</option> | |
178 <option value="minkowski">minkowski</option> | |
179 </param> | |
180 <param name="annealing" type="boolean" label="annealing" help="Should simulated annealing be used during the optimization process to improve parameter estimates?" truevalue="TRUE" falsevalue="FALSE" /> | |
181 <param name="init" type="select" display="radio" | |
182 label="init" help="Should the parameter estimates be initialized using k-means (’kmeans’) or Gaussian mixture model (’gmm’)?"> | |
183 <option value="kmeans" selected="True">kmeans</option> | |
184 <option value="gmm">gmm</option> | |
185 </param> | |
186 <param name="p0" type="float" value="0.05" label="p0" help="A regularization parameter applied to estimated posterior class probabilities to avoid singularities. Must be positive for successful gradient descent optimization.Changing this value (within reason) should have only minimal impact on values of parameter estimates, but may greatly affect the algorithm’s speed and stability." /> | |
187 <param name="iter_max" type="integer" value="100" label="iter.max" help="The maximum number of EM-algorithm iterations." /> | |
188 <param name="tol" type="float" value="0.05" label="tolerance" help="The tolerance convergence criterion for the EM-algorithm. Corresponds to the | |
189 change in log-likelihood."/> | |
190 <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/> | |
191 <param name="output_estimates" type="boolean" label="Generate estimates results"/> | |
192 <param name="output_probability" type="boolean" label="Generate probability and class results"/> | |
193 <param name="output_plots" type="boolean" label="Generate plots results"/> | |
194 <param name="output_rdata" type="boolean" label="Results as .RData output"/> | |
195 </inputs> | |
196 <outputs> | |
197 <data format="tabular" name="dgmm_summary" label="${tool.name} on ${on_string}: summary"/> | |
198 <data format="pdf" name="file_info" from_work_dir="single_ion_segmentation.pdf" label = "${tool.name} on ${on_string}: file_info"/> | |
199 <data format="rdata" name="dgmm_rdata" label="${tool.name} on ${on_string}: dgmm.RData"> | |
200 <filter>output_rdata</filter> | |
201 </data> | |
202 <collection name="estimates_output" type="list" label="${tool.name} logs: ${on_string}: estimates"> | |
203 <filter>output_estimates</filter> | |
204 <discover_datasets pattern="__designation_and_ext__" directory="DGMM_estimates" format="tabular"/> | |
205 </collection> | |
206 <collection name="probability_output" type="list" label="${tool.name} logs: ${on_string}: probability"> | |
207 <filter>output_probability</filter> | |
208 <discover_datasets pattern="__designation_and_ext__" directory="DGMM_probability" format="tabular"/> | |
209 </collection> | |
210 <collection name="plots_output" type="list" label="${tool.name} logs: ${on_string}: plots"> | |
211 <filter>output_plots</filter> | |
212 <discover_datasets pattern="__designation_and_ext__" directory="DGMM_plots" format="tabular"/> | |
213 </collection> | |
214 </outputs> | |
215 <tests> | |
216 <test> | |
217 <param name="infile" value="" ftype="imzml"> | |
218 <composite_data value="spatial_DGMM_input.imzML"/> | |
219 <composite_data value="spatial_DGMM_input.ibd"/> | |
220 </param> | |
221 <param name="r" value="1"/> | |
222 <param name="k" value="6"/> | |
223 <param name="method" value="adaptive"/> | |
224 <param name="dist" value="radial"/> | |
225 <param name="annealing" value="TRUE"/> | |
226 <param name="output_estimates" value="True"/> | |
227 <param name="output_probability" value="True"/> | |
228 <output name="file_info" file="dgmm_test1.pdf" compare="sim_size"/> | |
229 <output name="dgmm_summary" file="dgmm_summary1.tabular"/> | |
230 <output_collection name="estimates_output" type="list" count="10"> | |
231 <element name="estimates_r1_k6_mz1135.93347167969" file="estimates_r1_k6_mz1135.93347167969.tabular"/> | |
232 </output_collection> | |
233 <output_collection name="probability_output" type="list" count="10"> | |
234 <element name="probability_r1_k6_mz1023.70806884766" file="probability_r1_k6_mz1023.70806884766.tabular"/> | |
235 </output_collection> | |
236 </test> | |
237 <test> | |
238 <param name="infile" value="" ftype="imzml"> | |
239 <composite_data value="spatial_DGMM_input.imzML"/> | |
240 <composite_data value="spatial_DGMM_input.ibd"/> | |
241 </param> | |
242 <conditional name="sample_groups"> | |
243 <param name="group" value="multiple_groups"/> | |
244 <param name="annotation_file" value="DGMM_annotations.tabular"/> | |
245 <param name="column_x" value="1"/> | |
246 <param name="column_y" value="2"/> | |
247 <param name="column_names" value="3"/> | |
248 <param name="tabular_header" value="True"/> | |
249 </conditional> | |
250 <param name="r" value="2"/> | |
251 <param name="k" value="10"/> | |
252 <param name="annealing" value="TRUE"/> | |
253 <param name="output_estimates" value="True"/> | |
254 <param name="output_probability" value="True"/> | |
255 <param name="output_plots" value="True"/> | |
256 <param name="output_rdata" value="True"/> | |
257 <output name="file_info" file="dgmm_test2.pdf" compare="sim_size"/> | |
258 <output name="dgmm_summary" file="dgmm_summary2.tabular"/> | |
259 <output name="dgmm_rdata" file="dgmm_test2.RData" compare="sim_size"/> | |
260 <output_collection name="estimates_output" type="list" count="10"> | |
261 <element name="estimates_r2_k10_mz1200.46533203125" file="estimates_r2_k10_mz1200.46533203125.tabular"/> | |
262 </output_collection> | |
263 <output_collection name="probability_output" type="list" count="10"> | |
264 <element name="probability_r2_k10_mz1135.93347167969" file="probability_r2_k10_mz1135.93347167969.tabular"/> | |
265 </output_collection> | |
266 </test> | |
267 </tests> | |
268 <help> | |
269 <![CDATA[ | |
270 | |
271 @CARDINAL_DESCRIPTION@ | |
272 | |
273 ----- | |
274 | |
275 This tool fits spatially-aware Dirichlet Gaussian mixture models (DGMM) to each feature and each run in an mass spectrometry imaging | |
276 experiment. Each image is segmented and the means and variances of all Gaussian components are estimated. A linear filter with a spatial kernel is applied to the component probabilities to achieve | |
277 spatial smoothing. Simulated annealing is used in the EM-algorithm to avoid local optima and achieve more accurate parameter estimates. | |
278 | |
279 @MSIDATA_INPUT_DESCRIPTION@ | |
280 - NA intensities are not allowed | |
281 - duplicated coordinates will be removed | |
282 - It is highly recommended to use a dataset that is reduced for the number of m/z features e.g. pre-processed, binned, filtered for m/z of interest in order to keep computational times reasonable. In addition, it is beneficial to run the tool first without generating all possible results data and upon inspection of the summary of the results decide on the best tool parameters and m/z features (which can be filtered in the MSI filtering tool). | |
283 | |
284 @SPECTRA_TABULAR_INPUT_DESCRIPTION@ | |
285 | |
286 **Tips** | |
287 | |
288 - The input dataset should contain as few m/z features as possible to keep computational times reasonable. In addition, it is beneficial to run the tool first without generating all possible results data and upon inspection of the summary of the results decide on the best tool parameters and m/z features (which can be filtered in the MSI filtering tool). | |
289 - Pixels from distinct ovbservational units (e.g. sample, patient) should be assigned to a unique group via the annotation file and segmented separately for the validity of downstream statistical analysis. | |
290 | |
291 **Output** | |
292 | |
293 - Pdf with file info and an image of the clusters for each m/z feature | |
294 - Tabular file summarizing spatial DGMM performance for each feature | |
295 - (optional) Tabular files for each spatial DGMM run and feature with | |
296 | |
297 - probabilities (The probability of class membership for each Gaussian component) and classes (The predicted Gaussian component) | |
298 - estimates (A list giving the parameter estimates for the means and variances for each Gaussian component) | |
299 - (optional) Visualization of features density plots | |
300 - (optional) .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package | |
301 | |
302 ]]> | |
303 </help> | |
304 <citations> | |
305 <citation type="doi">10.1093/bioinformatics/btv146</citation> | |
306 <citation type="doi">10.1093/bioinformatics/btz345</citation> | |
307 </citations> | |
308 | |
309 </tool> |