Mercurial > repos > galaxyp > cardinal_classification
comparison classification.xml @ 0:2fdbbb1be2b0 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author | galaxyp |
---|---|
date | Mon, 01 Oct 2018 01:06:08 -0400 |
parents | |
children | 6a03b201bc12 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2fdbbb1be2b0 |
---|---|
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.0"> | |
2 <description>spatial classification of mass spectrometry imaging data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"> | |
7 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
8 <requirement type="package" version="0.20-35">r-lattice</requirement> | |
9 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | |
10 </expand> | |
11 <command detect_errors="exit_code"> | |
12 <![CDATA[ | |
13 | |
14 @INPUT_LINKING@ | |
15 cat '${MSI_segmentation}' && | |
16 Rscript '${MSI_segmentation}' | |
17 | |
18 ]]> | |
19 </command> | |
20 <configfiles> | |
21 <configfile name="MSI_segmentation"><![CDATA[ | |
22 | |
23 | |
24 ################################# load libraries and read file ######################### | |
25 | |
26 library(Cardinal) | |
27 library(gridExtra) | |
28 library(lattice) | |
29 library(ggplot2) | |
30 | |
31 @READING_MSIDATA@ | |
32 | |
33 ## create full matrix to make processed imzML files compatible with classification | |
34 iData(msidata) <- iData(msidata)[] | |
35 | |
36 @DATA_PROPERTIES@ | |
37 | |
38 ######################################## PDF ################################### | |
39 ################################################################################ | |
40 ################################################################################ | |
41 | |
42 Title = "Prediction" | |
43 | |
44 #if str( $type_cond.type_method) == "training": | |
45 | |
46 Title = "$type_cond.method_cond.class_method" | |
47 #end if | |
48 | |
49 pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12) | |
50 plot(0,type='n',axes=FALSE,ann=FALSE) | |
51 | |
52 | |
53 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) | |
54 | |
55 | |
56 ##################### I) numbers and control plots ############################# | |
57 ############################################################################### | |
58 | |
59 ## table with values | |
60 grid.table(property_df, rows= NULL) | |
61 | |
62 | |
63 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ | |
64 | |
65 opar <- par() | |
66 | |
67 ######################## II) Training ############################# | |
68 ############################################################################# | |
69 #if str( $type_cond.type_method) == "training": | |
70 print("training") | |
71 | |
72 | |
73 ## load y response (will be needed in every training scenario) | |
74 | |
75 y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE) | |
76 | |
77 #if str($type_cond.column_fold) == "None": | |
78 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)] | |
79 #else | |
80 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)] | |
81 #end if | |
82 | |
83 colnames(y_input)[1:2] = c("x", "y") | |
84 ## merge with coordinate information of msidata | |
85 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | |
86 colnames(msidata_coordinates)[3] = "pixel_index" | |
87 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) | |
88 merged_response[is.na(merged_response)] = "NA" | |
89 merged_response = merged_response[order(merged_response\$pixel_index),] | |
90 y_vector = as.factor(merged_response[,4]) | |
91 | |
92 ## plot of y vector | |
93 | |
94 position_df = cbind(coord(msidata)[,1:2], y_vector) | |
95 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+ | |
96 geom_tile() + | |
97 coord_fixed()+ | |
98 ggtitle("Distribution of the response variable y")+ | |
99 theme_bw()+ | |
100 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
101 theme(legend.position="bottom",legend.direction="vertical")+ | |
102 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
103 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
104 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector) | |
105 print(y_plot) | |
106 | |
107 | |
108 ## plot of folds | |
109 | |
110 #if str($type_cond.column_fold) != "None": | |
111 fold_vector = as.factor(merged_response[,5]) | |
112 | |
113 | |
114 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
115 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
116 geom_tile() + | |
117 coord_fixed()+ | |
118 ggtitle("Distribution of the fold variable")+ | |
119 theme_bw()+ | |
120 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
121 theme(legend.position="bottom",legend.direction="vertical")+ | |
122 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
123 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
124 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
125 print(fold_plot) | |
126 | |
127 #end if | |
128 | |
129 ######################## PLS ############################# | |
130 #if str( $type_cond.method_cond.class_method) == "PLS": | |
131 print("PLS") | |
132 | |
133 ######################## PLS - CV ############################# | |
134 #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply": | |
135 print("PLS cv") | |
136 | |
137 ## set variables for components and number of response groups | |
138 components = c($type_cond.method_cond.analysis_cond.plscv_comp) | |
139 number_groups = length(levels(y_vector)) | |
140 | |
141 ## PLS-cvApply: | |
142 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) | |
143 | |
144 ## create table with summary | |
145 count = 1 | |
146 summary_plscv = list() | |
147 accuracy_vector = numeric() | |
148 for (iteration in components){ | |
149 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
150 ## change class of numbers into numeric to round and calculate mean | |
151 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | |
152 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | |
153 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | |
154 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | |
155 summary_iteration4 = t(summary_iteration3) | |
156 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) | |
157 summary_plscv[[count]] = summary_iteration5 | |
158 count = count+1} ## create list with summary table for each component | |
159 summary_plscv = do.call(rbind, summary_plscv) | |
160 summary_df = as.data.frame(summary_plscv) | |
161 colnames(summary_df) = NULL | |
162 | |
163 ## plots | |
164 ## plot to find ncomp with highest accuracy | |
165 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") | |
166 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
167 ## one image for each sample/fold, 4 images per page | |
168 minimumy = min(coord(msidata.cv.pls)[,2]) | |
169 maximumy = max(coord(msidata.cv.pls)[,2]) | |
170 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) | |
171 | |
172 ## print table with summary in pdf | |
173 par(opar) | |
174 plot(0,type='n',axes=FALSE,ann=FALSE) | |
175 title(main="Summary for the different components\n", adj=0.5) | |
176 ## 20 rows fits in one page: | |
177 if (nrow(summary_df)<=20){ | |
178 grid.table(summary_df, rows= NULL) | |
179 }else{ | |
180 grid.table(summary_df[1:20,], rows= NULL) | |
181 mincount = 21 | |
182 maxcount = 40 | |
183 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
184 plot(0,type='n',axes=FALSE,ann=FALSE) | |
185 if (maxcount <= nrow(summary_df)){ | |
186 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
187 mincount = mincount+20 | |
188 maxcount = maxcount+20 | |
189 }else{### stop last page with last sample otherwise NA in table | |
190 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
191 } | |
192 } | |
193 | |
194 ## optional output as .RData | |
195 #if $output_rdata: | |
196 save(msidata.cv.pls, file="$classification_rdata") | |
197 #end if | |
198 | |
199 | |
200 ######################## PLS - analysis ########################### | |
201 #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis": | |
202 print("PLS analysis") | |
203 | |
204 ## set variables for components and number of response groups | |
205 component = c($type_cond.method_cond.analysis_cond.pls_comp) | |
206 number_groups = length(levels(y_vector)) | |
207 | |
208 ### pls analysis and coefficients plot | |
209 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) | |
210 plot(msidata.pls, main="PLS coefficients per m/z") | |
211 | |
212 ### summary table of PLS | |
213 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]] | |
214 summary_table2 = round(as.numeric(summary_table), digits=2) | |
215 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | |
216 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
217 summary_table4 = t(summary_table3) | |
218 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4) | |
219 plot(0,type='n',axes=FALSE,ann=FALSE) | |
220 grid.table(summary_table5, rows= NULL) | |
221 | |
222 ### image of the best m/z | |
223 minimumy = min(coord(msidata)[,2]) | |
224 maximumy = max(coord(msidata)[,2]) | |
225 print(image(msidata, mz = topLabels(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), smooth.image="gaussian", main="best m/z heatmap")) | |
226 | |
227 ### m/z and pixel information output | |
228 pls_classes = data.frame(msidata.pls\$classes[[1]]) | |
229 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
230 pixel_names = gsub(" = ", "y_", pixel_names) | |
231 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | |
232 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | |
233 | |
234 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) | |
235 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") | |
236 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) | |
237 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
238 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
239 | |
240 ## image with predicted classes | |
241 prediction_df = cbind(coord(msidata.pls)[,1:2], pls_classes) | |
242 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
243 | |
244 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
245 geom_tile() + | |
246 coord_fixed()+ | |
247 ggtitle("Predicted condition for each pixel")+ | |
248 theme_bw()+ | |
249 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
250 theme(legend.position="bottom",legend.direction="vertical")+ | |
251 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
252 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
253 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
254 print(prediction_plot) | |
255 | |
256 ### optional output as .RData | |
257 #if $output_rdata: | |
258 save(msidata.pls, file="$classification_rdata") | |
259 #end if | |
260 | |
261 #end if | |
262 | |
263 ######################## OPLS ############################# | |
264 #elif str( $type_cond.method_cond.class_method) == "OPLS": | |
265 print("OPLS") | |
266 | |
267 ######################## OPLS -CV ############################# | |
268 #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply": | |
269 print("OPLS cv") | |
270 | |
271 ## set variables for components and number of response groups | |
272 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) | |
273 number_groups = length(levels(y_vector)) | |
274 | |
275 ## OPLS-cvApply: | |
276 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) | |
277 | |
278 ## create table with summary | |
279 count = 1 | |
280 summary_oplscv = list() | |
281 accuracy_vector = numeric() | |
282 for (iteration in components){ | |
283 | |
284 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
285 ## change class of numbers into numeric to round and calculate mean | |
286 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | |
287 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | |
288 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | |
289 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | |
290 summary_iteration4 = t(summary_iteration3) | |
291 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) | |
292 summary_oplscv[[count]] = summary_iteration5 | |
293 count = count+1} ## create list with summary table for each component | |
294 summary_oplscv = do.call(rbind, summary_oplscv) | |
295 summary_df = as.data.frame(summary_oplscv) | |
296 colnames(summary_df) = NULL | |
297 | |
298 ## plots | |
299 ## plot to find ncomp with highest accuracy | |
300 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") | |
301 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
302 ## one image for each sample/fold, 4 images per page | |
303 minimumy = min(coord(msidata.cv.opls)[,2]) | |
304 maximumy = max(coord(msidata.cv.opls)[,2]) | |
305 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) | |
306 | |
307 ## print table with summary in pdf | |
308 par(opar) | |
309 plot(0,type='n',axes=FALSE,ann=FALSE) | |
310 title(main="Summary for the different components\n", adj=0.5) | |
311 ## 20 rows fits in one page: | |
312 if (nrow(summary_df)<=20){ | |
313 grid.table(summary_df, rows= NULL) | |
314 }else{ | |
315 grid.table(summary_df[1:20,], rows= NULL) | |
316 mincount = 21 | |
317 maxcount = 40 | |
318 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
319 plot(0,type='n',axes=FALSE,ann=FALSE) | |
320 if (maxcount <= nrow(summary_df)){ | |
321 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
322 mincount = mincount+20 | |
323 maxcount = maxcount+20 | |
324 }else{### stop last page with last sample otherwise NA in table | |
325 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
326 } | |
327 } | |
328 | |
329 ## optional output as .RData | |
330 #if $output_rdata: | |
331 save(msidata.cv.opls, file="$classification_rdata") | |
332 #end if | |
333 | |
334 | |
335 ######################## OPLS -analysis ########################### | |
336 #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis": | |
337 print("OPLS analysis") | |
338 | |
339 ## set variables for components and number of response groups | |
340 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) | |
341 number_groups = length(levels(y_vector)) | |
342 | |
343 | |
344 ### opls analysis and coefficients plot | |
345 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew) | |
346 plot(msidata.opls, main="OPLS coefficients per m/z") | |
347 | |
348 ### summary table of OPLS | |
349 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] | |
350 summary_table2 = round(as.numeric(summary_table), digits=2) | |
351 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | |
352 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
353 summary_table4 = t(summary_table3) | |
354 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4) | |
355 plot(0,type='n',axes=FALSE,ann=FALSE) | |
356 grid.table(summary_table5, rows= NULL) | |
357 | |
358 ### image of the best m/z | |
359 minimumy = min(coord(msidata)[,2]) | |
360 maximumy = max(coord(msidata)[,2]) | |
361 print(image(msidata, mz = topLabels(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) | |
362 | |
363 ## m/z and pixel information output | |
364 opls_classes = data.frame(msidata.opls\$classes[[1]]) | |
365 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
366 pixel_names = gsub(" = ", "y_", pixel_names) | |
367 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | |
368 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | |
369 opls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, opls_classes) | |
370 colnames(opls_classes2) = c("pixel names", "x", "y","predicted condition") | |
371 | |
372 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) | |
373 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
374 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
375 | |
376 ## image with predicted classes | |
377 prediction_df = cbind(coord(msidata.opls)[,1:2], opls_classes) | |
378 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
379 | |
380 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
381 geom_tile() + | |
382 coord_fixed()+ | |
383 ggtitle("Predicted condition for each pixel")+ | |
384 theme_bw()+ | |
385 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
386 theme(legend.position="bottom",legend.direction="vertical")+ | |
387 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
388 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
389 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
390 print(prediction_plot) | |
391 | |
392 ## optional output as .RData | |
393 #if $output_rdata: | |
394 save(msidata.opls, file="$classification_rdata") | |
395 #end if | |
396 #end if | |
397 | |
398 | |
399 ######################## SSC ############################# | |
400 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | |
401 print("SSC") | |
402 | |
403 ######################## SSC - CV ############################# | |
404 #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply": | |
405 print("SSC cv") | |
406 | |
407 ## set variables for components and number of response groups | |
408 number_groups = length(levels(y_vector)) | |
409 | |
410 ## SSC-cvApply: | |
411 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
412 | |
413 ## create table with summary | |
414 count = 1 | |
415 summary_ssccv = list() | |
416 accuracy_vector = numeric() | |
417 iteration_vector = character() | |
418 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){ | |
419 | |
420 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]] | |
421 ## change class of numbers into numeric to round and calculate mean | |
422 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | |
423 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | |
424 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | |
425 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | |
426 summary_iteration4 = t(summary_iteration3) | |
427 summary_iteration5 = cbind(c(iteration, colnames(summary_iteration)), summary_iteration4) | |
428 summary_ssccv[[count]] = summary_iteration5 | |
429 iteration_vector[count] = unlist(strsplit(iteration, "[,]"))[3] | |
430 count = count+1} ## create list with summary table for each component | |
431 summary_ssccv = do.call(rbind, summary_ssccv) | |
432 summary_df = as.data.frame(summary_ssccv) | |
433 colnames(summary_df) = NULL | |
434 | |
435 ## plot to find parameters with highest accuracy | |
436 plot(c($type_cond.method_cond.ssc_s),accuracy_vector[!duplicated(iteration_vector)], type="o",ylab="Mean accuracy", xlab = "Shrinkage parameter (s)", main="Mean accuracy of SSC classification") | |
437 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy | |
438 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) | |
439 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | |
440 minimumy = min(coord(msidata.cv.ssc)[,2]) | |
441 maximumy = max(coord(msidata.cv.ssc)[,2]) | |
442 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(2,2)) | |
443 | |
444 ## print table with summary in pdf | |
445 par(opar) | |
446 plot(0,type='n',axes=FALSE,ann=FALSE) | |
447 title(main="Summary for the different parameters\n", adj=0.5) | |
448 ## 20 rows fits in one page: | |
449 if (nrow(summary_df)<=20){ | |
450 grid.table(summary_df, rows= NULL) | |
451 }else{ | |
452 grid.table(summary_df[1:20,], rows= NULL) | |
453 mincount = 21 | |
454 maxcount = 40 | |
455 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
456 plot(0,type='n',axes=FALSE,ann=FALSE) | |
457 if (maxcount <= nrow(summary_df)){ | |
458 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
459 mincount = mincount+20 | |
460 maxcount = maxcount+20 | |
461 }else{### stop last page with last sample otherwise NA in table | |
462 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
463 } | |
464 } | |
465 | |
466 ## optional output as .RData | |
467 #if $output_rdata: | |
468 save(msidata.cv.ssc, file="$classification_rdata") | |
469 #end if | |
470 | |
471 ######################## SSC -analysis ########################### | |
472 #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis": | |
473 print("SSC analysis") | |
474 | |
475 ## set variables for components and number of response groups | |
476 number_groups = length(levels(y_vector)) | |
477 | |
478 ## SSC analysis and plot | |
479 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, | |
480 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
481 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) | |
482 | |
483 ### summary table SSC | |
484 ##############summary_table = summary(msidata.ssc) | |
485 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] | |
486 summary_table2 = round(as.numeric(summary_table), digits=2) | |
487 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | |
488 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
489 summary_table4 = t(summary_table3) | |
490 summary_table5 = cbind(c(names(msidata.ssc@resultData),colnames(summary_table)), summary_table4) | |
491 plot(0,type='n',axes=FALSE,ann=FALSE) | |
492 grid.table(summary_table5, rows= NULL) | |
493 | |
494 ### image of the best m/z | |
495 minimumy = min(coord(msidata)[,2]) | |
496 maximumy = max(coord(msidata)[,2]) | |
497 print(image(msidata, mz = topLabels(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) | |
498 | |
499 ## m/z and pixel information output | |
500 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) | |
501 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
502 pixel_names = gsub(" = ", "y_", pixel_names) | |
503 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | |
504 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | |
505 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) | |
506 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") | |
507 ssc_toplabels = topLabels(msidata.ssc, n=$type_cond.method_cond.ssc_analysis_cond.ssc_toplabels) | |
508 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
509 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
510 | |
511 ## image with predicted classes | |
512 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes) | |
513 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
514 | |
515 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
516 geom_tile() + | |
517 coord_fixed()+ | |
518 ggtitle("Predicted condition for each pixel")+ | |
519 theme_bw()+ | |
520 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
521 theme(legend.position="bottom",legend.direction="vertical")+ | |
522 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
523 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
524 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
525 print(prediction_plot) | |
526 | |
527 | |
528 ## optional output as .RData | |
529 #if $output_rdata: | |
530 save(msidata.ssc, file="$classification_rdata") | |
531 #end if | |
532 | |
533 #end if | |
534 #end if | |
535 | |
536 | |
537 | |
538 ######################## II) Prediction ############################# | |
539 ############################################################################# | |
540 | |
541 #elif str($type_cond.type_method) == "prediction": | |
542 print("prediction") | |
543 | |
544 training_data = loadRData("$type_cond.training_result") | |
545 | |
546 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": | |
547 print("new response") | |
548 | |
549 new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE) | |
550 new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)] | |
551 colnames(new_y_input)[1:2] = c("x", "y") | |
552 ## merge with coordinate information of msidata | |
553 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | |
554 colnames(msidata_coordinates)[3] = "pixel_index" | |
555 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) | |
556 merged_response[is.na(merged_response)] = "NA" | |
557 merged_response = merged_response[order(merged_response\$pixel_index),] | |
558 new_y_vector = as.factor(merged_response[,4]) | |
559 prediction = predict(training_data,msidata, newy = new_y_vector) | |
560 | |
561 #else | |
562 prediction = predict(training_data,msidata) | |
563 #end if | |
564 | |
565 ## m/z and pixel information output | |
566 predicted_classes = data.frame(prediction\$classes[[1]]) | |
567 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
568 pixel_names = gsub(" = ", "y_", pixel_names) | |
569 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | |
570 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | |
571 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) | |
572 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") | |
573 predicted_toplabels = topLabels(prediction, n=$type_cond.predicted_toplabels) | |
574 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
575 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
576 | |
577 ## image with predicted classes | |
578 | |
579 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes) | |
580 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
581 | |
582 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | |
583 geom_tile() + | |
584 coord_fixed()+ | |
585 ggtitle("Predicted condition for each pixel")+ | |
586 theme_bw()+ | |
587 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
588 theme(legend.position="bottom",legend.direction="vertical")+ | |
589 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
590 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | |
591 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
592 print(prediction_plot) | |
593 | |
594 ## Summary table prediction | |
595 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]] | |
596 summary_table2 = round(as.numeric(summary_table), digits=2) | |
597 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table)) | |
598 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
599 summary_table4 = t(summary_table3) | |
600 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4) | |
601 plot(0,type='n',axes=FALSE,ann=FALSE) | |
602 grid.table(summary_table5, rows= NULL) | |
603 | |
604 ## optional output as .RData | |
605 #if $output_rdata: | |
606 msidata = prediction | |
607 save(msidata, file="$classification_rdata") | |
608 #end if | |
609 | |
610 #end if | |
611 | |
612 dev.off() | |
613 | |
614 }else{ | |
615 print("Inputfile has no intensities > 0 or contains NA values") | |
616 dev.off() | |
617 } | |
618 | |
619 | |
620 ]]></configfile> | |
621 </configfiles> | |
622 <inputs> | |
623 <expand macro="reading_msidata"/> | |
624 <conditional name="type_cond"> | |
625 <param name="type_method" type="select" label="Analysis step to perform"> | |
626 <option value="training" selected="True">training</option> | |
627 <option value="prediction">prediction</option> | |
628 </param> | |
629 <when value="training"> | |
630 | |
631 <param name="annotation_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and their classes" | |
632 help="Three or four columns: x values, y values, response values, optionally fold values"/> | |
633 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> | |
634 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> | |
635 <param name="column_response" data_ref="annotation_file" label="Column with response (condition) values" type="data_column" help="This is the condition (pixel group) which will be classified"/> | |
636 <param name="column_fold" data_ref="annotation_file" optional="True" label="Column with fold values - only neccessary for cvapply" type="data_column" help="Each fold must contain pixels of all response groups and is used for cross validation"/> | |
637 <param name="tabular_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
638 | |
639 <conditional name="method_cond"> | |
640 <param name="class_method" type="select" label="Select the method for classification"> | |
641 <option value="PLS" selected="True">PLS-DA</option> | |
642 <option value="OPLS">OPLS-DA</option> | |
643 <option value="spatialShrunkenCentroids">spatial shrunken centroids</option> | |
644 </param> | |
645 <when value="PLS"> | |
646 | |
647 <conditional name="analysis_cond"> | |
648 <param name="PLS_method" type="select" label="Crossvalidation or analysis"> | |
649 <option value="cvapply" selected="True">cvApply</option> | |
650 <option value="PLS_analysis">PLS-DA analysis</option> | |
651 </param> | |
652 <when value="cvapply"> | |
653 <param name="plscv_comp" type="text" value="1:2" | |
654 label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | |
655 <expand macro="sanitizer_multiple_digits"/> | |
656 </param> | |
657 </when> | |
658 <when value="PLS_analysis"> | |
659 <param name="pls_comp" type="integer" value="5" | |
660 label="The optimal number of PLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS-DA components"/> | |
661 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> | |
662 <param name="pls_toplabels" type="integer" value="100" | |
663 label="Number of toplabels (m/z features) which should be written in tabular output"/> | |
664 </when> | |
665 </conditional> | |
666 </when> | |
667 | |
668 <when value="OPLS"> | |
669 | |
670 <conditional name="opls_analysis_cond"> | |
671 <param name="opls_method" type="select" label="Analysis step to perform"> | |
672 <option value="opls_cvapply" selected="True">cvApply</option> | |
673 <option value="opls_analysis">OPLS-DA analysis</option> | |
674 </param> | |
675 | |
676 <when value="opls_cvapply"> | |
677 <param name="opls_cvcomp" type="text" value="1:2" | |
678 label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | |
679 <expand macro="sanitizer_multiple_digits"/> | |
680 </param> | |
681 <param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
682 </when> | |
683 | |
684 <when value="opls_analysis"> | |
685 <param name="opls_comp" type="integer" value="5" | |
686 label="The optimal number of OPLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of OPLS-DA components"/> | |
687 <param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
688 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> | |
689 <param name="opls_toplabels" type="integer" value="100" | |
690 label="Number of toplabels (m/z features) which should be written in tabular output"/> | |
691 </when> | |
692 </conditional> | |
693 </when> | |
694 | |
695 <when value="spatialShrunkenCentroids"> | |
696 <conditional name="ssc_analysis_cond"> | |
697 <param name="ssc_method" type="select" label="Analysis step to perform"> | |
698 <option value="ssc_cvapply" selected="True">cvApply</option> | |
699 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | |
700 </param> | |
701 <when value="ssc_cvapply"/> | |
702 | |
703 <when value="ssc_analysis"> | |
704 <param name="ssc_toplabels" type="integer" value="100" | |
705 label="Number of toplabels (m/z features) which should be written in tabular output"/> | |
706 </when> | |
707 </conditional> | |
708 <param name="ssc_r" type="text" value="2" | |
709 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | |
710 <expand macro="sanitizer_multiple_digits"/> | |
711 </param> | |
712 <param name="ssc_s" type="text" value="2" | |
713 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | |
714 <expand macro="sanitizer_multiple_digits"/> | |
715 </param> | |
716 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | |
717 <option value="gaussian">gaussian</option> | |
718 <option value="adaptive" selected="True">adaptive</option> | |
719 </param> | |
720 | |
721 </when> | |
722 </conditional> | |
723 </when> | |
724 | |
725 <when value="prediction"> | |
726 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | |
727 <param name="predicted_toplabels" type="integer" value="100" | |
728 label="Number of toplabels (m/z features) which should be written in tabular output"/> | |
729 <conditional name="new_y_values_cond"> | |
730 <param name="new_y_values" type="select" label="Should new response values be used"> | |
731 <option value="no_new_response" selected="True">old response should be used</option> | |
732 <option value="new_response">load new response from tabular file</option> | |
733 </param> | |
734 <when value="no_new_response"/> | |
735 <when value="new_response"> | |
736 <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/> | |
737 <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/> | |
738 <param name="column_new_y" data_ref="new_response_file" label="Column with y values" type="data_column"/> | |
739 <param name="column_new_response" data_ref="new_response_file" label="Column with new response values" type="data_column"/> | |
740 <param name="new_tabular_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
741 </when> | |
742 </conditional> | |
743 </when> | |
744 </conditional> | |
745 <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> | |
746 </inputs> | |
747 <outputs> | |
748 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}"/> | |
749 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> | |
750 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> | |
751 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> | |
752 <filter>output_rdata</filter> | |
753 </data> | |
754 </outputs> | |
755 <tests> | |
756 <test expect_num_outputs="3"> | |
757 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
758 <conditional name="type_cond"> | |
759 <param name="type_method" value="training"/> | |
760 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> | |
761 <param name="column_x" value="1"/> | |
762 <param name="column_y" value="2"/> | |
763 <param name="column_response" value="4"/> | |
764 <param name="column_fold" value="3"/> | |
765 <param name="tabular_header" value="False"/> | |
766 <conditional name="method_cond"> | |
767 <param name="class_method" value="PLS"/> | |
768 <conditional name="analysis_cond"> | |
769 <param name="PLS_method" value="cvapply"/> | |
770 <param name="plscv_comp" value="2:4"/> | |
771 </conditional> | |
772 </conditional> | |
773 </conditional> | |
774 <output name="mzfeatures" file="features_test1.tabular"/> | |
775 <output name="pixeloutput" file="pixels_test1.tabular"/> | |
776 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/> | |
777 </test> | |
778 | |
779 <test expect_num_outputs="4"> | |
780 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
781 <conditional name="type_cond"> | |
782 <param name="type_method" value="training"/> | |
783 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> | |
784 <param name="column_x" value="1"/> | |
785 <param name="column_y" value="2"/> | |
786 <param name="column_response" value="4"/> | |
787 <param name="tabular_header" value="False"/> | |
788 <conditional name="method_cond"> | |
789 <param name="class_method" value="PLS"/> | |
790 <conditional name="analysis_cond"> | |
791 <param name="PLS_method" value="PLS_analysis"/> | |
792 <param name="pls_comp" value="2"/> | |
793 <param name="pls_scale" value="TRUE"/> | |
794 <param name="pls_toplabels" value="100"/> | |
795 </conditional> | |
796 </conditional> | |
797 </conditional> | |
798 <param name="output_rdata" value="True"/> | |
799 <output name="mzfeatures" file="features_test2.tabular"/> | |
800 <output name="pixeloutput" file="pixels_test2.tabular"/> | |
801 <output name="classification_images" file="test2.pdf" compare="sim_size"/> | |
802 <output name="classification_rdata" file="test2.rdata" compare="sim_size"/> | |
803 </test> | |
804 | |
805 <test expect_num_outputs="3"> | |
806 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
807 <conditional name="type_cond"> | |
808 <param name="type_method" value="training"/> | |
809 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> | |
810 <param name="column_x" value="1"/> | |
811 <param name="column_y" value="2"/> | |
812 <param name="column_response" value="4"/> | |
813 <param name="column_fold" value="3"/> | |
814 <param name="tabular_header" value="False"/> | |
815 <conditional name="method_cond"> | |
816 <param name="class_method" value="OPLS"/> | |
817 <conditional name="opls_analysis_cond"> | |
818 <param name="opls_method" value="opls_cvapply"/> | |
819 <param name="opls_cvcomp" value="1:2"/> | |
820 <param name="xnew_cv" value="FALSE"/> | |
821 </conditional> | |
822 </conditional> | |
823 </conditional> | |
824 <output name="mzfeatures" file="features_test3.tabular"/> | |
825 <output name="pixeloutput" file="pixels_test3.tabular"/> | |
826 <output name="classification_images" file="test3.pdf" compare="sim_size"/> | |
827 </test> | |
828 | |
829 <test expect_num_outputs="4"> | |
830 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
831 <conditional name="type_cond"> | |
832 <param name="type_method" value="training"/> | |
833 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> | |
834 <param name="column_x" value="1"/> | |
835 <param name="column_y" value="2"/> | |
836 <param name="column_response" value="4"/> | |
837 <param name="tabular_header" value="False"/> | |
838 <conditional name="method_cond"> | |
839 <param name="class_method" value="OPLS"/> | |
840 <conditional name="opls_analysis_cond"> | |
841 <param name="opls_method" value="opls_analysis"/> | |
842 <param name="opls_comp" value="3"/> | |
843 <param name="xnew" value="FALSE"/> | |
844 <param name="opls_scale" value="FALSE"/> | |
845 <param name="opls_toplabels" value="100"/> | |
846 </conditional> | |
847 </conditional> | |
848 </conditional> | |
849 <param name="output_rdata" value="True"/> | |
850 <output name="mzfeatures" file="features_test4.tabular"/> | |
851 <output name="pixeloutput" file="pixels_test4.tabular"/> | |
852 <output name="classification_images" file="test4.pdf" compare="sim_size"/> | |
853 <output name="classification_rdata" file="test4.rdata" compare="sim_size"/> | |
854 </test> | |
855 | |
856 <test expect_num_outputs="3"> | |
857 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
858 <conditional name="type_cond"> | |
859 <param name="type_method" value="training"/> | |
860 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> | |
861 <param name="column_x" value="1"/> | |
862 <param name="column_y" value="2"/> | |
863 <param name="column_response" value="3"/> | |
864 <param name="column_fold" value="4"/> | |
865 <param name="tabular_header" value="False"/> | |
866 <conditional name="method_cond"> | |
867 <param name="class_method" value="spatialShrunkenCentroids"/> | |
868 <conditional name="ssc_analysis_cond"> | |
869 <param name="ssc_method" value="ssc_cvapply"/> | |
870 <param name="ssc_r" value="1:2"/> | |
871 <param name="ssc_s" value="2:3"/> | |
872 <param name="ssc_kernel_method" value="adaptive"/> | |
873 </conditional> | |
874 </conditional> | |
875 </conditional> | |
876 <output name="mzfeatures" file="features_test5.tabular"/> | |
877 <output name="pixeloutput" file="pixels_test5.tabular"/> | |
878 <output name="classification_images" file="test5.pdf" compare="sim_size"/> | |
879 </test> | |
880 | |
881 <test expect_num_outputs="4"> | |
882 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
883 <conditional name="type_cond"> | |
884 <param name="type_method" value="training"/> | |
885 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> | |
886 <param name="column_x" value="1"/> | |
887 <param name="column_y" value="2"/> | |
888 <param name="column_response" value="4"/> | |
889 <conditional name="method_cond"> | |
890 <param name="class_method" value="spatialShrunkenCentroids"/> | |
891 <conditional name="ssc_analysis_cond"> | |
892 <param name="ssc_method" value="ssc_analysis"/> | |
893 <param name="ssc_toplabels" value="20"/> | |
894 </conditional> | |
895 <param name="ssc_r" value="2"/> | |
896 <param name="ssc_s" value="2"/> | |
897 <param name="ssc_kernel_method" value="adaptive"/> | |
898 </conditional> | |
899 </conditional> | |
900 <param name="output_rdata" value="True"/> | |
901 <output name="mzfeatures" file="features_test6.tabular"/> | |
902 <output name="pixeloutput" file="pixels_test6.tabular"/> | |
903 <output name="classification_images" file="test6.pdf" compare="sim_size"/> | |
904 <output name="classification_rdata" file="test6.rdata" compare="sim_size" /> | |
905 </test> | |
906 | |
907 <test expect_num_outputs="4"> | |
908 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
909 <conditional name="type_cond"> | |
910 <param name="type_method" value="prediction"/> | |
911 <param name="training_result" value="test2.rdata" ftype="rdata"/> | |
912 <conditional name="new_y_values_cond"> | |
913 <param name="new_y_values" value="new_response"/> | |
914 <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
915 <param name="column_new_x" value="1"/> | |
916 <param name="column_new_y" value="2"/> | |
917 <param name="column_new_response" value="4"/> | |
918 <param name="new_tabular_header" value="False"/> | |
919 </conditional> | |
920 </conditional> | |
921 <param name="output_rdata" value="True"/> | |
922 <output name="mzfeatures" file="features_test7.tabular"/> | |
923 <output name="pixeloutput" file="pixels_test7.tabular"/> | |
924 <output name="classification_images" file="test7.pdf" compare="sim_size"/> | |
925 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> | |
926 </test> | |
927 </tests> | |
928 <help> | |
929 <![CDATA[ | |
930 | |
931 | |
932 @CARDINAL_DESCRIPTION@ | |
933 | |
934 ----- | |
935 | |
936 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. | |
937 | |
938 @MSIDATA_INPUT_DESCRIPTION@ | |
939 - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided). | |
940 | |
941 :: | |
942 | |
943 x_coord y_coord condition fold | |
944 1 1 A f1 | |
945 2 1 A f2 | |
946 3 1 A f3 | |
947 1 2 B f1 | |
948 2 2 B f2 | |
949 3 2 B f3 | |
950 ... | |
951 ... | |
952 | |
953 | |
954 - For prediction: RData output from previous classification run is needed as input, optionally new response values can be loaded with a tabular file containing x values, y values and the response | |
955 | |
956 | |
957 **Options** | |
958 | |
959 - PLS-DA: partial least square discriminant analysis | |
960 - O-PLS-DA: Orthogonal partial least squares discriminant analysis | |
961 - Spatial shrunken centroids | |
962 | |
963 **Tips** | |
964 | |
965 - The classification function will only run on files with valid intensity values (NA are not allowed) | |
966 - Only a single input file is accepted, several files have to be combined previously, for example with the msi_combine tool. | |
967 | |
968 | |
969 **Output** | |
970 | |
971 - Pdf with the heatmaps and plots for the classification | |
972 - Tabular file with information on m/z features and pixels: toplabels/classes | |
973 - Optional: RData output that can be used to predict new data or to explore the results more deeply with the Cardinal package in R | |
974 | |
975 ]]> | |
976 </help> | |
977 <expand macro="citations"/> | |
978 </tool> |