comparison classification.xml @ 4:47fc5b518ffc draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:13:29 -0400
parents 585ef27873c9
children 6f4c34f8d5ba
comparison
equal deleted inserted replaced
3:585ef27873c9 4:47fc5b518ffc
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.2"> 1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.3">
2 <description>spatial classification of mass spectrometry imaging data</description> 2 <description>spatial classification of mass spectrometry imaging data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
28 library(lattice) 28 library(lattice)
29 library(ggplot2) 29 library(ggplot2)
30 30
31 @READING_MSIDATA_INRAM@ 31 @READING_MSIDATA_INRAM@
32 32
33 ## to make sure that processed files work as well:
34 iData(msidata) = iData(msidata)[]
35 33
36 ## remove duplicated coordinates 34 ## remove duplicated coordinates
37 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
38 msidata <- msidata[,!duplicated(coord(msidata))] 35 msidata <- msidata[,!duplicated(coord(msidata))]
39 36
40 @DATA_PROPERTIES_INRAM@ 37 @DATA_PROPERTIES_INRAM@
41 38
42 39
63 60
64 ## table with values 61 ## table with values
65 grid.table(property_df, rows= NULL) 62 grid.table(property_df, rows= NULL)
66 63
67 64
68 if (npeaks > 0 && sum(is.na(spectra(msidata)[]))==0){ 65 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){
69 66
70 opar <- par() 67 opar <- par()
71 68
72 ######################## II) Training ############################# 69 ######################## II) Training #############################
73 ############################################################################# 70 #############################################################################
174 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") 171 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification")
175 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy 172 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
176 ## one image for each sample/fold, 4 images per page 173 ## one image for each sample/fold, 4 images per page
177 minimumy = min(coord(msidata.cv.pls)[,2]) 174 minimumy = min(coord(msidata.cv.pls)[,2])
178 maximumy = max(coord(msidata.cv.pls)[,2]) 175 maximumy = max(coord(msidata.cv.pls)[,2])
179 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) 176 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1))
180 177
181 ## print table with summary in pdf 178 ## print table with summary in pdf
182 par(opar) 179 par(opar)
183 plot(0,type='n',axes=FALSE,ann=FALSE) 180 plot(0,type='n',axes=FALSE,ann=FALSE)
184 title(main="Summary for the different components\n", adj=0.5) 181 title(main="Summary for the different components\n", adj=0.5)
212 209
213 ## set variables for components and number of response groups 210 ## set variables for components and number of response groups
214 component = c($type_cond.method_cond.analysis_cond.pls_comp) 211 component = c($type_cond.method_cond.analysis_cond.pls_comp)
215 number_groups = length(levels(y_vector)) 212 number_groups = length(levels(y_vector))
216 213
214 ### stop if multiple values for PLS components are selected what sets component to 0
215 tryCatch(
216 {
217
218 if (component==0)
219 {
220 stop(call.=FALSE)
221 }
222 },
223 error=function(cond) {
224 ## in case user used multiple inputs for component - this is only possible in cv apply
225 message("Error during PLS training")
226 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)")
227 stop(call.=FALSE)
228 }
229 )
230
217 ### pls analysis and coefficients plot 231 ### pls analysis and coefficients plot
218 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) 232 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale)
219 plot(msidata.pls, main="PLS coefficients per m/z") 233 plot(msidata.pls, main="PLS coefficients per m/z")
220 234
221 ### summary table of PLS 235 ### summary table of PLS
246 ## remove msidata to clean up RAM space 260 ## remove msidata to clean up RAM space
247 rm(msidata) 261 rm(msidata)
248 gc() 262 gc()
249 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) 263 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes)
250 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") 264 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition")
251 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) 265 pls_toplabels = topLabels(msidata.pls, n=Inf)
252 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) 266 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6)
253 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 267 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
254 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 268 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
255 269
256 ## image with predicted classes 270 ## image with predicted classes
287 ## set variables for components and number of response groups 301 ## set variables for components and number of response groups
288 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) 302 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp)
289 number_groups = length(levels(y_vector)) 303 number_groups = length(levels(y_vector))
290 304
291 ## OPLS-cvApply: 305 ## OPLS-cvApply:
292 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) 306 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components)
307 ## for use to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv
293 308
294 ## remove msidata to clean up RAM space 309 ## remove msidata to clean up RAM space
295 rm(msidata) 310 rm(msidata)
296 gc() 311 gc()
297 312
320 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") 335 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification")
321 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy 336 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy
322 ## one image for each sample/fold, 4 images per page 337 ## one image for each sample/fold, 4 images per page
323 minimumy = min(coord(msidata.cv.opls)[,2]) 338 minimumy = min(coord(msidata.cv.opls)[,2])
324 maximumy = max(coord(msidata.cv.opls)[,2]) 339 maximumy = max(coord(msidata.cv.opls)[,2])
325 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(2, 2)) 340 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1))
326 341
327 ## print table with summary in pdf 342 ## print table with summary in pdf
328 par(opar) 343 par(opar)
329 plot(0,type='n',axes=FALSE,ann=FALSE) 344 plot(0,type='n',axes=FALSE,ann=FALSE)
330 title(main="Summary for the different components\n", adj=0.5) 345 title(main="Summary for the different components\n", adj=0.5)
358 373
359 ## set variables for components and number of response groups 374 ## set variables for components and number of response groups
360 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) 375 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp)
361 number_groups = length(levels(y_vector)) 376 number_groups = length(levels(y_vector))
362 377
378 ### stop if multiple values for OPLS components are selected what sets component to 0
379 tryCatch(
380 {
381
382 if (component==0)
383 {
384 stop(call.=FALSE)
385 }
386 },
387 error=function(cond) {
388 ## in case user used multiple inputs for component - this is only possible in cv apply
389 message("Error during OPLS training")
390 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)")
391 stop(call.=FALSE)
392 }
393 )
363 394
364 ### opls analysis and coefficients plot 395 ### opls analysis and coefficients plot
365 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew) 396 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale)
397 ## to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew
366 plot(msidata.opls, main="OPLS coefficients per m/z") 398 plot(msidata.opls, main="OPLS coefficients per m/z")
399
367 400
368 ### summary table of OPLS 401 ### summary table of OPLS
369 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] 402 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]]
370 summary_table2 = round(as.numeric(summary_table), digits=2) 403 summary_table2 = round(as.numeric(summary_table), digits=2)
371 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) 404 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups)
394 427
395 ## remove msidata to clean up RAM space 428 ## remove msidata to clean up RAM space
396 rm(msidata) 429 rm(msidata)
397 gc() 430 gc()
398 431
399 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) 432 opls_toplabels = topLabels(msidata.opls, n=Inf)
400 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) 433 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6)
401 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 434 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
402 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 435 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
403 436
404 ## image with predicted classes 437 ## image with predicted classes
469 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy 502 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy
470 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) 503 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4))
471 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space 504 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space
472 minimumy = min(coord(msidata.cv.ssc)[,2]) 505 minimumy = min(coord(msidata.cv.ssc)[,2])
473 maximumy = max(coord(msidata.cv.ssc)[,2]) 506 maximumy = max(coord(msidata.cv.ssc)[,2])
474 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(2,2)) 507 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1))
475 508
476 ## print table with summary in pdf 509 ## print table with summary in pdf
477 par(opar) 510 par(opar)
478 plot(0,type='n',axes=FALSE,ann=FALSE) 511 plot(0,type='n',axes=FALSE,ann=FALSE)
479 title(main="Summary for the different parameters\n", adj=0.5) 512 title(main="Summary for the different parameters\n", adj=0.5)
512 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") 545 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method")
513 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) 546 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)))
514 547
515 ### summary table SSC 548 ### summary table SSC
516 ##############summary_table = summary(msidata.ssc) 549 ##############summary_table = summary(msidata.ssc)
550
551 ### stop if multiple values for r and s were used as input
552 tryCatch(
553 {
554
555 if (length(names(msidata.ssc@resultData))>1)
556 {
557 stop(call.=FALSE)
558 }
559 },
560 error=function(cond) {
561 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply
562 message("Error during SSC training")
563 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)")
564 stop(call.=FALSE)
565 }
566 )
567
517 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] 568 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]]
518 summary_table2 = round(as.numeric(summary_table), digits=2) 569 summary_table2 = round(as.numeric(summary_table), digits=2)
519 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) 570 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups)
520 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table 571 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table
521 summary_table4 = t(summary_table3) 572 summary_table4 = t(summary_table3)
544 rm(msidata) 595 rm(msidata)
545 gc() 596 gc()
546 597
547 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) 598 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes)
548 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") 599 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition")
549 ssc_toplabels = topLabels(msidata.ssc, n=$type_cond.method_cond.ssc_analysis_cond.ssc_toplabels) 600 ssc_toplabels = topLabels(msidata.ssc, n=Inf)
550 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) 601 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6)
551 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 602 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
552 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 603 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
553 604
554 ## image with predicted classes 605 ## image with predicted classes
611 pixel_names = gsub(" = ", "y_", pixel_names) 662 pixel_names = gsub(" = ", "y_", pixel_names)
612 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] 663 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2]
613 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] 664 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3]
614 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) 665 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes)
615 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") 666 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition")
616 predicted_toplabels = topLabels(prediction, n=$type_cond.predicted_toplabels) 667 predicted_toplabels = topLabels(prediction, n=Inf)
617 if (colnames(predicted_toplabels)[4] == "coefficients"){ 668 if (colnames(predicted_toplabels)[4] == "coefficients"){
618 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) 669 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5)
619 670
620 }else{ 671 }else{
621 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} 672 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)}
697 <option value="cvapply" selected="True">cvApply</option> 748 <option value="cvapply" selected="True">cvApply</option>
698 <option value="PLS_analysis">PLS-DA analysis</option> 749 <option value="PLS_analysis">PLS-DA analysis</option>
699 </param> 750 </param>
700 <when value="cvapply"> 751 <when value="cvapply">
701 <param name="plscv_comp" type="text" value="1:2" 752 <param name="plscv_comp" type="text" value="1:2"
702 label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> 753 label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Mininum is 1.">
703 <expand macro="sanitizer_multiple_digits"/> 754 <expand macro="sanitizer_multiple_digits"/>
704 </param> 755 </param>
705 </when> 756 </when>
706 <when value="PLS_analysis"> 757 <when value="PLS_analysis">
707 <param name="pls_comp" type="integer" value="5" 758 <param name="pls_comp" type="integer" value="5"
708 label="The optimal number of PLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS-DA components"/> 759 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/>
709 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> 760 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/>
710 <param name="pls_toplabels" type="integer" value="100" 761 <param name="pls_toplabels" type="integer" value="100"
711 label="Number of toplabels (m/z features) which should be written in tabular output"/> 762 label="Number of toplabels (m/z features) which should be written in tabular output"/>
712 </when> 763 </when>
713 </conditional> 764 </conditional>
721 <option value="opls_analysis">OPLS-DA analysis</option> 772 <option value="opls_analysis">OPLS-DA analysis</option>
722 </param> 773 </param>
723 774
724 <when value="opls_cvapply"> 775 <when value="opls_cvapply">
725 <param name="opls_cvcomp" type="text" value="1:2" 776 <param name="opls_cvcomp" type="text" value="1:2"
726 label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> 777 label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Minimum is 1.">
727 <expand macro="sanitizer_multiple_digits"/> 778 <expand macro="sanitizer_multiple_digits"/>
728 </param> 779 </param>
729 <param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> 780 <!--param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/-->
730 </when> 781 </when>
731 782
732 <when value="opls_analysis"> 783 <when value="opls_analysis">
733 <param name="opls_comp" type="integer" value="5" 784 <param name="opls_comp" type="integer" value="5"
734 label="The optimal number of OPLS-DA components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of OPLS-DA components"/> 785 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/>
735 <param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> 786 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/-->
736 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> 787 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/>
737 <param name="opls_toplabels" type="integer" value="100" 788 <!--param name="opls_toplabels" type="integer" value="100"
738 label="Number of toplabels (m/z features) which should be written in tabular output"/> 789 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
739 </when> 790 </when>
740 </conditional> 791 </conditional>
741 </when> 792 </when>
742 793
743 <when value="spatialShrunkenCentroids"> 794 <when value="spatialShrunkenCentroids">
747 <option value="ssc_analysis">spatial shrunken centroids analysis</option> 798 <option value="ssc_analysis">spatial shrunken centroids analysis</option>
748 </param> 799 </param>
749 <when value="ssc_cvapply"/> 800 <when value="ssc_cvapply"/>
750 801
751 <when value="ssc_analysis"> 802 <when value="ssc_analysis">
752 <param name="ssc_toplabels" type="integer" value="100" 803 <!--param name="ssc_toplabels" type="integer" value="100"
753 label="Number of toplabels (m/z features) which should be written in tabular output"/> 804 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
754 </when> 805 </when>
755 </conditional> 806 </conditional>
756 <param name="ssc_r" type="text" value="2" 807 <param name="ssc_r" type="text" value="2"
757 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> 808 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)">
758 <expand macro="sanitizer_multiple_digits"/> 809 <expand macro="sanitizer_multiple_digits"/>
759 </param> 810 </param>
760 <param name="ssc_s" type="text" value="2" 811 <param name="ssc_s" type="text" value="2"
761 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"> 812 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)." help="For cvapply multiple values are allowed (e.g. 0,1,2 or 2:5)">
762 <expand macro="sanitizer_multiple_digits"/> 813 <expand macro="sanitizer_multiple_digits"/>
763 </param> 814 </param>
764 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> 815 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights">
765 <option value="gaussian">gaussian</option> 816 <option value="gaussian">gaussian</option>
766 <option value="adaptive" selected="True">adaptive</option> 817 <option value="adaptive" selected="True">adaptive</option>
770 </conditional> 821 </conditional>
771 </when> 822 </when>
772 823
773 <when value="prediction"> 824 <when value="prediction">
774 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> 825 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/>
775 <param name="predicted_toplabels" type="integer" value="100" 826 <!--param name="predicted_toplabels" type="integer" value="100"
776 label="Number of toplabels (m/z features) which should be written in tabular output"/> 827 label="Number of toplabels (m/z features) which should be written in tabular output"/-->
777 <conditional name="new_y_values_cond"> 828 <conditional name="new_y_values_cond">
778 <param name="new_y_values" type="select" label="Should new response values be used"> 829 <param name="new_y_values" type="select" label="Should new response values be used">
779 <option value="no_new_response" selected="True">old response should be used</option> 830 <option value="no_new_response" selected="True">old response should be used</option>
780 <option value="new_response">load new response from tabular file</option> 831 <option value="new_response">load new response from tabular file</option>
781 </param> 832 </param>
837 <param name="class_method" value="PLS"/> 888 <param name="class_method" value="PLS"/>
838 <conditional name="analysis_cond"> 889 <conditional name="analysis_cond">
839 <param name="PLS_method" value="PLS_analysis"/> 890 <param name="PLS_method" value="PLS_analysis"/>
840 <param name="pls_comp" value="2"/> 891 <param name="pls_comp" value="2"/>
841 <param name="pls_scale" value="TRUE"/> 892 <param name="pls_scale" value="TRUE"/>
842 <param name="pls_toplabels" value="100"/> 893 <!--param name="pls_toplabels" value="100"/-->
843 </conditional> 894 </conditional>
844 </conditional> 895 </conditional>
845 </conditional> 896 </conditional>
846 <param name="output_rdata" value="True"/> 897 <param name="output_rdata" value="True"/>
847 <output name="mzfeatures" file="features_test2.tabular"/> 898 <output name="mzfeatures" file="features_test2.tabular"/>
888 <conditional name="opls_analysis_cond"> 939 <conditional name="opls_analysis_cond">
889 <param name="opls_method" value="opls_analysis"/> 940 <param name="opls_method" value="opls_analysis"/>
890 <param name="opls_comp" value="3"/> 941 <param name="opls_comp" value="3"/>
891 <param name="xnew" value="FALSE"/> 942 <param name="xnew" value="FALSE"/>
892 <param name="opls_scale" value="FALSE"/> 943 <param name="opls_scale" value="FALSE"/>
893 <param name="opls_toplabels" value="100"/> 944 <!--param name="opls_toplabels" value="100"/-->
894 </conditional> 945 </conditional>
895 </conditional> 946 </conditional>
896 </conditional> 947 </conditional>
897 <param name="output_rdata" value="True"/> 948 <param name="output_rdata" value="True"/>
898 <output name="mzfeatures" file="features_test4.tabular"/> 949 <output name="mzfeatures" file="features_test4.tabular"/>
936 <param name="column_response" value="4"/> 987 <param name="column_response" value="4"/>
937 <conditional name="method_cond"> 988 <conditional name="method_cond">
938 <param name="class_method" value="spatialShrunkenCentroids"/> 989 <param name="class_method" value="spatialShrunkenCentroids"/>
939 <conditional name="ssc_analysis_cond"> 990 <conditional name="ssc_analysis_cond">
940 <param name="ssc_method" value="ssc_analysis"/> 991 <param name="ssc_method" value="ssc_analysis"/>
941 <param name="ssc_toplabels" value="20"/> 992 <!--param name="ssc_toplabels" value="20"/-->
942 </conditional> 993 </conditional>
943 <param name="ssc_r" value="2"/> 994 <param name="ssc_r" value="2"/>
944 <param name="ssc_s" value="2"/> 995 <param name="ssc_s" value="2"/>
945 <param name="ssc_kernel_method" value="adaptive"/> 996 <param name="ssc_kernel_method" value="adaptive"/>
946 </conditional> 997 </conditional>
982 ----- 1033 -----
983 1034
984 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. 1035 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data.
985 1036
986 @MSIDATA_INPUT_DESCRIPTION@ 1037 @MSIDATA_INPUT_DESCRIPTION@
1038 - NA intensities are not allowed
1039 - duplicated coordinates will be removed
1040
987 - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided). 1041 - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided).
988 1042
989 :: 1043 ::
990 1044
991 x_coord y_coord condition fold 1045 x_coord y_coord condition fold
1004 1058
1005 **Options** 1059 **Options**
1006 1060
1007 - PLS-DA: partial least square discriminant analysis 1061 - PLS-DA: partial least square discriminant analysis
1008 - O-PLS-DA: Orthogonal partial least squares discriminant analysis 1062 - O-PLS-DA: Orthogonal partial least squares discriminant analysis
1009 - Spatial shrunken centroids 1063 - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_)
1064 - training and prediction
1065
1066 - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions)
1067 - training with the best value for s gives the top m/z features for each condition and the predicted classification group for each spectrum
1068 - training result can be saved as RData file that can be reused for prediction of further samples
1069
1070
1071 .. image:: $PATH_TO_IMAGES/classification_overview.png
1072 :width: 1000
1073 :height: 465
1074
1075
1010 1076
1011 **Tips** 1077 **Tips**
1012 1078
1013 - The classification function will only run on files with valid intensity values (NA are not allowed) 1079 - The classification function will only run on files with valid intensity values (NA are not allowed)
1014 - Only a single input file is accepted, several files have to be combined previously, for example with the msi_combine tool. 1080 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool.
1015 1081
1016 1082
1017 **Output** 1083 **Output**
1018 1084
1019 - Pdf with the heatmaps and plots for the classification 1085 - Pdf with the heatmaps and plots for the classification