Mercurial > repos > galaxyp > cardinal_classification
comparison classification.xml @ 4:47fc5b518ffc draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author | galaxyp |
---|---|
date | Fri, 22 Mar 2019 08:13:29 -0400 |
parents | 585ef27873c9 |
children | 6f4c34f8d5ba |
comparison
equal
deleted
inserted
replaced
3:585ef27873c9 | 4:47fc5b518ffc |
---|---|
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.2"> | 1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.3"> |
2 <description>spatial classification of mass spectrometry imaging data</description> | 2 <description>spatial classification of mass spectrometry imaging data</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"> | 6 <expand macro="requirements"> |
28 library(lattice) | 28 library(lattice) |
29 library(ggplot2) | 29 library(ggplot2) |
30 | 30 |
31 @READING_MSIDATA_INRAM@ | 31 @READING_MSIDATA_INRAM@ |
32 | 32 |
33 ## to make sure that processed files work as well: | |
34 iData(msidata) = iData(msidata)[] | |
35 | 33 |
36 ## remove duplicated coordinates | 34 ## remove duplicated coordinates |
37 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) | |
38 msidata <- msidata[,!duplicated(coord(msidata))] | 35 msidata <- msidata[,!duplicated(coord(msidata))] |
39 | 36 |
40 @DATA_PROPERTIES_INRAM@ | 37 @DATA_PROPERTIES_INRAM@ |
41 | 38 |
42 | 39 |
63 | 60 |
64 ## table with values | 61 ## table with values |
65 grid.table(property_df, rows= NULL) | 62 grid.table(property_df, rows= NULL) |
66 | 63 |
67 | 64 |
68 if (npeaks > 0 && sum(is.na(spectra(msidata)[]))==0){ | 65 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ |
69 | 66 |
70 opar <- par() | 67 opar <- par() |
71 | 68 |
72 ######################## II) Training ############################# | 69 ######################## II) Training ############################# |
73 ############################################################################# | 70 ############################################################################# |
174 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") | 171 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") |
175 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | 172 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy |
176 ## one image for each sample/fold, 4 images per page | 173 ## one image for each sample/fold, 4 images per page |
177 minimumy = min(coord(msidata.cv.pls)[,2]) | 174 minimumy = min(coord(msidata.cv.pls)[,2]) |
178 maximumy = max(coord(msidata.cv.pls)[,2]) | 175 maximumy = max(coord(msidata.cv.pls)[,2]) |
179 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) | 176 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1)) |
180 | 177 |
181 ## print table with summary in pdf | 178 ## print table with summary in pdf |
182 par(opar) | 179 par(opar) |
183 plot(0,type='n',axes=FALSE,ann=FALSE) | 180 plot(0,type='n',axes=FALSE,ann=FALSE) |
184 title(main="Summary for the different components\n", adj=0.5) | 181 title(main="Summary for the different components\n", adj=0.5) |
212 | 209 |
213 ## set variables for components and number of response groups | 210 ## set variables for components and number of response groups |
214 component = c($type_cond.method_cond.analysis_cond.pls_comp) | 211 component = c($type_cond.method_cond.analysis_cond.pls_comp) |
215 number_groups = length(levels(y_vector)) | 212 number_groups = length(levels(y_vector)) |
216 | 213 |
214 ### stop if multiple values for PLS components are selected what sets component to 0 | |
215 tryCatch( | |
216 { | |
217 | |
218 if (component==0) | |
219 { | |
220 stop(call.=FALSE) | |
221 } | |
222 }, | |
223 error=function(cond) { | |
224 ## in case user used multiple inputs for component - this is only possible in cv apply | |
225 message("Error during PLS training") | |
226 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)") | |
227 stop(call.=FALSE) | |
228 } | |
229 ) | |
230 | |
217 ### pls analysis and coefficients plot | 231 ### pls analysis and coefficients plot |
218 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) | 232 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) |
219 plot(msidata.pls, main="PLS coefficients per m/z") | 233 plot(msidata.pls, main="PLS coefficients per m/z") |
220 | 234 |
221 ### summary table of PLS | 235 ### summary table of PLS |
246 ## remove msidata to clean up RAM space | 260 ## remove msidata to clean up RAM space |
247 rm(msidata) | 261 rm(msidata) |
248 gc() | 262 gc() |
249 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) | 263 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) |
250 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") | 264 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") |
251 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) | 265 pls_toplabels = topLabels(msidata.pls, n=Inf) |
252 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) | 266 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) |
253 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 267 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
254 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 268 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
255 | 269 |
256 ## image with predicted classes | 270 ## image with predicted classes |
287 ## set variables for components and number of response groups | 301 ## set variables for components and number of response groups |
288 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) | 302 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) |
289 number_groups = length(levels(y_vector)) | 303 number_groups = length(levels(y_vector)) |
290 | 304 |
291 ## OPLS-cvApply: | 305 ## OPLS-cvApply: |
292 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) | 306 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components) |
307 ## for use to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv | |
293 | 308 |
294 ## remove msidata to clean up RAM space | 309 ## remove msidata to clean up RAM space |
295 rm(msidata) | 310 rm(msidata) |
296 gc() | 311 gc() |
297 | 312 |
320 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") | 335 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") |
321 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | 336 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy |
322 ## one image for each sample/fold, 4 images per page | 337 ## one image for each sample/fold, 4 images per page |
323 minimumy = min(coord(msidata.cv.opls)[,2]) | 338 minimumy = min(coord(msidata.cv.opls)[,2]) |
324 maximumy = max(coord(msidata.cv.opls)[,2]) | 339 maximumy = max(coord(msidata.cv.opls)[,2]) |
325 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) | 340 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1)) |
326 | 341 |
327 ## print table with summary in pdf | 342 ## print table with summary in pdf |
328 par(opar) | 343 par(opar) |
329 plot(0,type='n',axes=FALSE,ann=FALSE) | 344 plot(0,type='n',axes=FALSE,ann=FALSE) |
330 title(main="Summary for the different components\n", adj=0.5) | 345 title(main="Summary for the different components\n", adj=0.5) |
358 | 373 |
359 ## set variables for components and number of response groups | 374 ## set variables for components and number of response groups |
360 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) | 375 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) |
361 number_groups = length(levels(y_vector)) | 376 number_groups = length(levels(y_vector)) |
362 | 377 |
378 ### stop if multiple values for OPLS components are selected what sets component to 0 | |
379 tryCatch( | |
380 { | |
381 | |
382 if (component==0) | |
383 { | |
384 stop(call.=FALSE) | |
385 } | |
386 }, | |
387 error=function(cond) { | |
388 ## in case user used multiple inputs for component - this is only possible in cv apply | |
389 message("Error during OPLS training") | |
390 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)") | |
391 stop(call.=FALSE) | |
392 } | |
393 ) | |
363 | 394 |
364 ### opls analysis and coefficients plot | 395 ### opls analysis and coefficients plot |
365 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew) | 396 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale) |
397 ## to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew | |
366 plot(msidata.opls, main="OPLS coefficients per m/z") | 398 plot(msidata.opls, main="OPLS coefficients per m/z") |
399 | |
367 | 400 |
368 ### summary table of OPLS | 401 ### summary table of OPLS |
369 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] | 402 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] |
370 summary_table2 = round(as.numeric(summary_table), digits=2) | 403 summary_table2 = round(as.numeric(summary_table), digits=2) |
371 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | 404 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) |
394 | 427 |
395 ## remove msidata to clean up RAM space | 428 ## remove msidata to clean up RAM space |
396 rm(msidata) | 429 rm(msidata) |
397 gc() | 430 gc() |
398 | 431 |
399 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) | 432 opls_toplabels = topLabels(msidata.opls, n=Inf) |
400 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) | 433 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) |
401 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 434 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
402 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 435 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
403 | 436 |
404 ## image with predicted classes | 437 ## image with predicted classes |
469 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy | 502 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy |
470 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) | 503 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) |
471 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | 504 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space |
472 minimumy = min(coord(msidata.cv.ssc)[,2]) | 505 minimumy = min(coord(msidata.cv.ssc)[,2]) |
473 maximumy = max(coord(msidata.cv.ssc)[,2]) | 506 maximumy = max(coord(msidata.cv.ssc)[,2]) |
474 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(2,2)) | 507 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1)) |
475 | 508 |
476 ## print table with summary in pdf | 509 ## print table with summary in pdf |
477 par(opar) | 510 par(opar) |
478 plot(0,type='n',axes=FALSE,ann=FALSE) | 511 plot(0,type='n',axes=FALSE,ann=FALSE) |
479 title(main="Summary for the different parameters\n", adj=0.5) | 512 title(main="Summary for the different parameters\n", adj=0.5) |
512 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | 545 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") |
513 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) | 546 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) |
514 | 547 |
515 ### summary table SSC | 548 ### summary table SSC |
516 ##############summary_table = summary(msidata.ssc) | 549 ##############summary_table = summary(msidata.ssc) |
550 | |
551 ### stop if multiple values for r and s were used as input | |
552 tryCatch( | |
553 { | |
554 | |
555 if (length(names(msidata.ssc@resultData))>1) | |
556 { | |
557 stop(call.=FALSE) | |
558 } | |
559 }, | |
560 error=function(cond) { | |
561 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply | |
562 message("Error during SSC training") | |
563 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)") | |
564 stop(call.=FALSE) | |
565 } | |
566 ) | |
567 | |
517 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] | 568 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] |
518 summary_table2 = round(as.numeric(summary_table), digits=2) | 569 summary_table2 = round(as.numeric(summary_table), digits=2) |
519 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | 570 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) |
520 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | 571 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table |
521 summary_table4 = t(summary_table3) | 572 summary_table4 = t(summary_table3) |
544 rm(msidata) | 595 rm(msidata) |
545 gc() | 596 gc() |
546 | 597 |
547 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) | 598 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) |
548 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") | 599 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") |
549 ssc_toplabels = topLabels(msidata.ssc, n=$type_cond.method_cond.ssc_analysis_cond.ssc_toplabels) | 600 ssc_toplabels = topLabels(msidata.ssc, n=Inf) |
550 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) | 601 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) |
551 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 602 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
552 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 603 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
553 | 604 |
554 ## image with predicted classes | 605 ## image with predicted classes |
611 pixel_names = gsub(" = ", "y_", pixel_names) | 662 pixel_names = gsub(" = ", "y_", pixel_names) |
612 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | 663 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] |
613 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | 664 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] |
614 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) | 665 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) |
615 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") | 666 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") |
616 predicted_toplabels = topLabels(prediction, n=$type_cond.predicted_toplabels) | 667 predicted_toplabels = topLabels(prediction, n=Inf) |
617 if (colnames(predicted_toplabels)[4] == "coefficients"){ | 668 if (colnames(predicted_toplabels)[4] == "coefficients"){ |
618 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) | 669 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) |
619 | 670 |
620 }else{ | 671 }else{ |
621 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} | 672 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} |
697 <option value="cvapply" selected="True">cvApply</option> | 748 <option value="cvapply" selected="True">cvApply</option> |
698 <option value="PLS_analysis">PLS-DA analysis</option> | 749 <option value="PLS_analysis">PLS-DA analysis</option> |
699 </param> | 750 </param> |
700 <when value="cvapply"> | 751 <when value="cvapply"> |
701 <param name="plscv_comp" type="text" value="1:2" | 752 <param name="plscv_comp" type="text" value="1:2" |
702 label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | 753 label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Mininum is 1."> |
703 <expand macro="sanitizer_multiple_digits"/> | 754 <expand macro="sanitizer_multiple_digits"/> |
704 </param> | 755 </param> |
705 </when> | 756 </when> |
706 <when value="PLS_analysis"> | 757 <when value="PLS_analysis"> |
707 <param name="pls_comp" type="integer" value="5" | 758 <param name="pls_comp" type="integer" value="5" |
708 label="The optimal number of PLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS-DA components"/> | 759 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/> |
709 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> | 760 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> |
710 <param name="pls_toplabels" type="integer" value="100" | 761 <param name="pls_toplabels" type="integer" value="100" |
711 label="Number of toplabels (m/z features) which should be written in tabular output"/> | 762 label="Number of toplabels (m/z features) which should be written in tabular output"/> |
712 </when> | 763 </when> |
713 </conditional> | 764 </conditional> |
721 <option value="opls_analysis">OPLS-DA analysis</option> | 772 <option value="opls_analysis">OPLS-DA analysis</option> |
722 </param> | 773 </param> |
723 | 774 |
724 <when value="opls_cvapply"> | 775 <when value="opls_cvapply"> |
725 <param name="opls_cvcomp" type="text" value="1:2" | 776 <param name="opls_cvcomp" type="text" value="1:2" |
726 label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | 777 label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Minimum is 1."> |
727 <expand macro="sanitizer_multiple_digits"/> | 778 <expand macro="sanitizer_multiple_digits"/> |
728 </param> | 779 </param> |
729 <param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | 780 <!--param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> |
730 </when> | 781 </when> |
731 | 782 |
732 <when value="opls_analysis"> | 783 <when value="opls_analysis"> |
733 <param name="opls_comp" type="integer" value="5" | 784 <param name="opls_comp" type="integer" value="5" |
734 label="The optimal number of OPLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of OPLS-DA components"/> | 785 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/> |
735 <param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | 786 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> |
736 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> | 787 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> |
737 <param name="opls_toplabels" type="integer" value="100" | 788 <!--param name="opls_toplabels" type="integer" value="100" |
738 label="Number of toplabels (m/z features) which should be written in tabular output"/> | 789 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
739 </when> | 790 </when> |
740 </conditional> | 791 </conditional> |
741 </when> | 792 </when> |
742 | 793 |
743 <when value="spatialShrunkenCentroids"> | 794 <when value="spatialShrunkenCentroids"> |
747 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | 798 <option value="ssc_analysis">spatial shrunken centroids analysis</option> |
748 </param> | 799 </param> |
749 <when value="ssc_cvapply"/> | 800 <when value="ssc_cvapply"/> |
750 | 801 |
751 <when value="ssc_analysis"> | 802 <when value="ssc_analysis"> |
752 <param name="ssc_toplabels" type="integer" value="100" | 803 <!--param name="ssc_toplabels" type="integer" value="100" |
753 label="Number of toplabels (m/z features) which should be written in tabular output"/> | 804 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
754 </when> | 805 </when> |
755 </conditional> | 806 </conditional> |
756 <param name="ssc_r" type="text" value="2" | 807 <param name="ssc_r" type="text" value="2" |
757 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | 808 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)"> |
758 <expand macro="sanitizer_multiple_digits"/> | 809 <expand macro="sanitizer_multiple_digits"/> |
759 </param> | 810 </param> |
760 <param name="ssc_s" type="text" value="2" | 811 <param name="ssc_s" type="text" value="2" |
761 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> | 812 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)." help="For cvapply multiple values are allowed (e.g. 0,1,2 or 2:5)"> |
762 <expand macro="sanitizer_multiple_digits"/> | 813 <expand macro="sanitizer_multiple_digits"/> |
763 </param> | 814 </param> |
764 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | 815 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> |
765 <option value="gaussian">gaussian</option> | 816 <option value="gaussian">gaussian</option> |
766 <option value="adaptive" selected="True">adaptive</option> | 817 <option value="adaptive" selected="True">adaptive</option> |
770 </conditional> | 821 </conditional> |
771 </when> | 822 </when> |
772 | 823 |
773 <when value="prediction"> | 824 <when value="prediction"> |
774 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | 825 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> |
775 <param name="predicted_toplabels" type="integer" value="100" | 826 <!--param name="predicted_toplabels" type="integer" value="100" |
776 label="Number of toplabels (m/z features) which should be written in tabular output"/> | 827 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
777 <conditional name="new_y_values_cond"> | 828 <conditional name="new_y_values_cond"> |
778 <param name="new_y_values" type="select" label="Should new response values be used"> | 829 <param name="new_y_values" type="select" label="Should new response values be used"> |
779 <option value="no_new_response" selected="True">old response should be used</option> | 830 <option value="no_new_response" selected="True">old response should be used</option> |
780 <option value="new_response">load new response from tabular file</option> | 831 <option value="new_response">load new response from tabular file</option> |
781 </param> | 832 </param> |
837 <param name="class_method" value="PLS"/> | 888 <param name="class_method" value="PLS"/> |
838 <conditional name="analysis_cond"> | 889 <conditional name="analysis_cond"> |
839 <param name="PLS_method" value="PLS_analysis"/> | 890 <param name="PLS_method" value="PLS_analysis"/> |
840 <param name="pls_comp" value="2"/> | 891 <param name="pls_comp" value="2"/> |
841 <param name="pls_scale" value="TRUE"/> | 892 <param name="pls_scale" value="TRUE"/> |
842 <param name="pls_toplabels" value="100"/> | 893 <!--param name="pls_toplabels" value="100"/--> |
843 </conditional> | 894 </conditional> |
844 </conditional> | 895 </conditional> |
845 </conditional> | 896 </conditional> |
846 <param name="output_rdata" value="True"/> | 897 <param name="output_rdata" value="True"/> |
847 <output name="mzfeatures" file="features_test2.tabular"/> | 898 <output name="mzfeatures" file="features_test2.tabular"/> |
888 <conditional name="opls_analysis_cond"> | 939 <conditional name="opls_analysis_cond"> |
889 <param name="opls_method" value="opls_analysis"/> | 940 <param name="opls_method" value="opls_analysis"/> |
890 <param name="opls_comp" value="3"/> | 941 <param name="opls_comp" value="3"/> |
891 <param name="xnew" value="FALSE"/> | 942 <param name="xnew" value="FALSE"/> |
892 <param name="opls_scale" value="FALSE"/> | 943 <param name="opls_scale" value="FALSE"/> |
893 <param name="opls_toplabels" value="100"/> | 944 <!--param name="opls_toplabels" value="100"/--> |
894 </conditional> | 945 </conditional> |
895 </conditional> | 946 </conditional> |
896 </conditional> | 947 </conditional> |
897 <param name="output_rdata" value="True"/> | 948 <param name="output_rdata" value="True"/> |
898 <output name="mzfeatures" file="features_test4.tabular"/> | 949 <output name="mzfeatures" file="features_test4.tabular"/> |
936 <param name="column_response" value="4"/> | 987 <param name="column_response" value="4"/> |
937 <conditional name="method_cond"> | 988 <conditional name="method_cond"> |
938 <param name="class_method" value="spatialShrunkenCentroids"/> | 989 <param name="class_method" value="spatialShrunkenCentroids"/> |
939 <conditional name="ssc_analysis_cond"> | 990 <conditional name="ssc_analysis_cond"> |
940 <param name="ssc_method" value="ssc_analysis"/> | 991 <param name="ssc_method" value="ssc_analysis"/> |
941 <param name="ssc_toplabels" value="20"/> | 992 <!--param name="ssc_toplabels" value="20"/--> |
942 </conditional> | 993 </conditional> |
943 <param name="ssc_r" value="2"/> | 994 <param name="ssc_r" value="2"/> |
944 <param name="ssc_s" value="2"/> | 995 <param name="ssc_s" value="2"/> |
945 <param name="ssc_kernel_method" value="adaptive"/> | 996 <param name="ssc_kernel_method" value="adaptive"/> |
946 </conditional> | 997 </conditional> |
982 ----- | 1033 ----- |
983 | 1034 |
984 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. | 1035 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. |
985 | 1036 |
986 @MSIDATA_INPUT_DESCRIPTION@ | 1037 @MSIDATA_INPUT_DESCRIPTION@ |
1038 - NA intensities are not allowed | |
1039 - duplicated coordinates will be removed | |
1040 | |
987 - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided). | 1041 - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided). |
988 | 1042 |
989 :: | 1043 :: |
990 | 1044 |
991 x_coord y_coord condition fold | 1045 x_coord y_coord condition fold |
1004 | 1058 |
1005 **Options** | 1059 **Options** |
1006 | 1060 |
1007 - PLS-DA: partial least square discriminant analysis | 1061 - PLS-DA: partial least square discriminant analysis |
1008 - O-PLS-DA: Orthogonal partial least squares discriminant analysis | 1062 - O-PLS-DA: Orthogonal partial least squares discriminant analysis |
1009 - Spatial shrunken centroids | 1063 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) |
1064 - training and prediction | |
1065 | |
1066 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions) | |
1067 - training with the best value for s gives the top m/z features for each condition and the predicted classification group for each spectrum | |
1068 - training result can be saved as RData file that can be reused for prediction of further samples | |
1069 | |
1070 | |
1071 .. image:: $PATH_TO_IMAGES/classification_overview.png | |
1072 :width: 1000 | |
1073 :height: 465 | |
1074 | |
1075 | |
1010 | 1076 |
1011 **Tips** | 1077 **Tips** |
1012 | 1078 |
1013 - The classification function will only run on files with valid intensity values (NA are not allowed) | 1079 - The classification function will only run on files with valid intensity values (NA are not allowed) |
1014 - Only a single input file is accepted, several files have to be combined previously, for example with the msi_combine tool. | 1080 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool. |
1015 | 1081 |
1016 | 1082 |
1017 **Output** | 1083 **Output** |
1018 | 1084 |
1019 - Pdf with the heatmaps and plots for the classification | 1085 - Pdf with the heatmaps and plots for the classification |