Mercurial > repos > galaxyp > cardinal_classification
view classification.xml @ 19:4c177985028a draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:45:03 +0000 |
parents | eddc2ae2db80 |
children |
line wrap: on
line source
<tool id="cardinal_classification" name="MSI classification" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description>spatial classification of mass spectrometry imaging data</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${MSI_segmentation}' && Rscript '${MSI_segmentation}' ]]> </command> <configfiles> <configfile name="MSI_segmentation"><![CDATA[ ################################# load libraries and read file ######################### library(Cardinal) library(gridExtra) library(ggplot2) library(scales) @READING_MSIDATA@ msidata = as(msidata, "MSImagingExperiment") ## remove duplicated coordinates msidata <- msidata[,!duplicated(coord(msidata))] @DATA_PROPERTIES_INRAM@ ######################################## PDF ################################### ################################################################################ ################################################################################ Title = "Prediction" #if str( $type_cond.type_method) == "training": Title = "$type_cond.method_cond.class_method" #end if pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0(Title," for file: \n\n", "$infile.display_name")) ##################### I) numbers and control plots ############################# ################################################################################ ## table with values grid.table(property_df, rows= NULL) int_matrix = as.matrix(spectra(msidata)) NAcount = sum(is.na(int_matrix)) if (npeaks > 0 && NAcount==0){ opar <- par() ######################## II) Training ####################################### ############################################################################# #if str( $type_cond.type_method) == "training": print("training") ## load y response (will be needed in every training scenario) y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE) #if str($type_cond.column_fold) == "None": y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)] #else y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)] #end if colnames(y_input)[1:2] = c("x", "y") ## merge with coordinate information of msidata msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) colnames(msidata_coordinates)[3] = "pixel_index" merged_response = as.data.frame(merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE)) merged_response[is.na(merged_response)] = "NA" merged_response = merged_response[order(merged_response\$pixel_index),] conditions = as.factor(merged_response[,4]) y_vector = conditions ## colours selection: #if str($colour_conditional.colour_type) == "manual_colour" #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) colourvector = c($color_string) #elif str($colour_conditional.colour_type) == "colourpalette" number_levels = (length(levels(conditions))) colourvector = noquote($colour_conditional.palettes)(number_levels) #end if ## plot of y vector position_df = as.data.frame(cbind(coord(msidata)[,1:2], conditions)) y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+ geom_tile() + coord_fixed()+ ggtitle("Distribution of the conditions")+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=4,byrow=TRUE))+ scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions) print(y_plot) ## plot of folds #if str($type_cond.column_fold) != "None": fold_vector = as.factor(merged_response[,5]) position_df = as.data.frame(cbind(coord(msidata)[,1:2], fold_vector)) fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ geom_tile() + coord_fixed()+ ggtitle("Distribution of the fold variable")+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=4,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) print(fold_plot) #end if ######################## PLS ############################# #if str( $type_cond.method_cond.class_method) == "PLS": print("PLS") ######################## PLS - CV ############################# #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply": print("PLS cv") ## set variables for components and number of response groups components = c($type_cond.method_cond.analysis_cond.plscv_comp) number_groups = length(levels(y_vector)) ## PLS-cvApply: msidata.cv.pls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) ## remove msidata to clean up RAM space rm(msidata) gc() ## create new summary table with cv results results_list <- NULL for (i in seq_along(components)) { ## extract accuracy, sensitivity, and specificity for the current i accuracy <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["accuracy"]]), digits=2) sensitivity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["sensitivity"]]), digits=2) specificity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["specificity"]]), digits=2) ## combine accuracy, sensitivity, and specificity into one data frame result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity) colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity") rownames(result_df) <- NULL ## add column names with ncomp as first row to each dataframe col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") result_df <- rbind(col_names_row, result_df) results_list[[i]] <- result_df } ## combine all data frames in the list into one data frame results_df <- do.call(rbind, results_list) summary_df <- results_df ## new table and plot of accuracies over all components summary.cv.pls = as.data.frame(summary(msidata.cv.pls)) plot(0,type='n',axes=FALSE,ann=FALSE) summary.cv.pls.round <- round(summary.cv.pls, digits=2) grid.table(summary.cv.pls.round, rows=NULL) accuracy_plot = ggplot(summary.cv.pls, aes(x = ncomp, y = Accuracy)) + geom_point(color = "blue", size = 3) + # Add points geom_line() + theme_bw() print(accuracy_plot) ## print table with summary in pdf par(opar) plot(0,type='n',axes=FALSE,ann=FALSE) title(main="Summary for the different components\n", adj=0.5) ## 20 rows fits in one page: if (nrow(summary_df)<=20){ grid.table(summary_df, rows= NULL) }else{ grid.table(summary_df[1:20,], rows= NULL) mincount = 21 maxcount = 40 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ plot(0,type='n',axes=FALSE,ann=FALSE) if (maxcount <= nrow(summary_df)){ grid.table(summary_df[mincount:maxcount,], rows= NULL) mincount = mincount+20 maxcount = maxcount+20 }else{### stop last page with last sample otherwise NA in table grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} } } ## optional output as .RData #if $output_rdata: save(msidata.cv.pls, file="$classification_rdata") #end if ######################## PLS - analysis ########################### #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis": print("PLS analysis") ## set variables for components and number of response groups component = c($type_cond.method_cond.analysis_cond.pls_comp) number_groups = length(levels(y_vector)) ### stop if multiple values for PLS components are selected what sets component to 0 tryCatch( { if (component==0) { stop(call.=FALSE) } }, error=function(cond) { ## in case user used multiple inputs for component - this is only possible in cv apply message("Error during PLS training") message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)") stop(call.=FALSE) } ) ### pls analysis and coefficients plot msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) plot(msidata.pls, main="PLS coefficients per m/z", col=colourvector) ## create new summary table summary_df = as.data.frame(summary(msidata.pls)) colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity") summary_df = round(summary_df, digits = 2) plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(summary_df, rows= NULL) ## Yweights plot: represent the importance of each response variable in predicting each component #if $type_cond.method_cond.analysis_cond.PLS_Yweights == "TRUE": Yweights = as.data.frame(msidata.pls@resultData@listData[[1]][["Yweights"]]) Yweights = round(Yweights, digits = 4) Yweights.class <- cbind("class" = rownames(Yweights), Yweights) plot(0,type='n',axes=FALSE,ann=FALSE) text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2) grid.table(Yweights.class, rows= NULL) #end if coefficient_plot = plot(msidata.pls, values="coefficients", lwd=2, main = "PLS coefficients per m/z") print(coefficient_plot) ## m/z and pixel information output pls_classes = data.frame(msidata.pls@resultData@listData[[1]][["class"]]) ## pixel names and coordinates x_coords = msidata_coordinates@listData[["x"]] y_coords = msidata_coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) ## remove msidata to clean up RAM space rm(msidata) gc() pls_classes2 = data.frame(pixel_names, x_coords, y_coords, pls_classes, y_vector) colnames(pls_classes2) = c("pixel_name", "x", "y","predicted_class", "annotated_class") pls_classes2\$correct <- ifelse(pls_classes2\$predicted_class==pls_classes2\$annotated_class, T, F) write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") correctness = round(sum(pls_classes2\$correct)/length(pls_classes2\$correct)*100,2) ## replace topFeatures table with coefficients table coefficients.df = as.data.frame(msidata.pls@resultData@listData[[1]][["coefficients"]]) row_names <- msidata.pls@featureData@mz coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df) write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) ## add loadings and weights table loadings.df = as.data.frame(msidata.pls@resultData@listData[[1]][["loadings"]]) loadings.df <- cbind("mz" = row_names, loadings.df) new_names <- paste0("loadings_", names(loadings.df)[-1]) names(loadings.df)[-1] <- new_names weights.df = as.data.frame(msidata.pls@resultData@listData[[1]][["weights"]]) weights.df <- cbind("mz" = row_names, weights.df) new_names <- paste0("weights_", names(weights.df)[-1]) names(weights.df)[-1] <- new_names ## combine loading and weights table merged.load.wei = merge(loadings.df, weights.df, by = "mz") write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) ## image with predicted classes prediction_df = as.data.frame(cbind(coord(msidata.pls)[,1:2], pls_classes)) colnames(prediction_df) = c("x", "y", "predicted_classes") prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ geom_tile() + coord_fixed()+ ggtitle("Predicted condition for each pixel")+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=4,byrow=TRUE))+ scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) print(prediction_plot) ## correctness plot correctness_plot = ggplot(pls_classes2, aes(x=x, y=y, fill=correct))+ geom_tile() + coord_fixed()+ ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=2,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~correct, data=pls_classes2, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) print(correctness_plot) ### optional output as .RData #if $output_rdata: save(msidata.pls, file="$classification_rdata") #end if #end if ######################## OPLS ############################# #elif str( $type_cond.method_cond.class_method) == "OPLS": print("OPLS") ######################## OPLS -CV ############################# #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply": print("OPLS cv") ## set variables for components and number of response groups components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) number_groups = length(levels(y_vector)) ## OPLS-cvApply: msidata.cv.opls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components) ## remove msidata to clean up RAM space rm(msidata) gc() ## new table with cv results to replace the old summary table results_list <- NULL for (i in seq_along(components)) { ## extract accuracy, sensitivity, and specificity for the current i accuracy <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["accuracy"]]), digits=2) sensitivity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["sensitivity"]]), digits=2) specificity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["specificity"]]), digits=2) ## combine accuracy, sensitivity, and specificity into one data frame result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity) colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity") rownames(result_df) <- NULL ## add column names with ncomp as first row to each dataframe col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") result_df <- rbind(col_names_row, result_df) results_list[[i]] <- result_df } ## combine all data frames in the list into one data frame results_df <- do.call(rbind, results_list) summary_df <- results_df ## new table and plot of accuracies over all components summary.cv.opls = as.data.frame(summary(msidata.cv.opls)) ## table with values plot(0,type='n',axes=FALSE,ann=FALSE) summary.cv.opls.round <- round(summary.cv.opls, digits=2) grid.table(summary.cv.opls.round, rows=NULL) accuracy_plot = ggplot(summary.cv.opls, aes(x = ncomp, y = Accuracy)) + geom_point(color = "blue", size = 3) + # Add points geom_line() + theme_bw() print(accuracy_plot) ## print table with summary in pdf par(opar) plot(0,type='n',axes=FALSE,ann=FALSE) title(main="Summary for the different components\n", adj=0.5) ## 20 rows fits in one page: if (nrow(summary_df)<=20){ grid.table(summary_df, rows= NULL) }else{ grid.table(summary_df[1:20,], rows= NULL) mincount = 21 maxcount = 40 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ plot(0,type='n',axes=FALSE,ann=FALSE) if (maxcount <= nrow(summary_df)){ grid.table(summary_df[mincount:maxcount,], rows= NULL) mincount = mincount+20 maxcount = maxcount+20 }else{### stop last page with last sample otherwise NA in table grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} } } ## optional output as .RData #if $output_rdata: save(msidata.cv.opls, file="$classification_rdata") #end if ######################## OPLS -analysis ########################### #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis": print("OPLS analysis") ## set variables for components and number of response groups component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) number_groups = length(levels(y_vector)) ### stop if multiple values for OPLS components are selected what sets component to 0 tryCatch( { if (component==0) { stop(call.=FALSE) } }, error=function(cond) { ## in case user used multiple inputs for component - this is only possible in cv apply message("Error during OPLS training") message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)") stop(call.=FALSE) } ) ### opls analysis and coefficients plot msidata.opls <- OPLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale) plot(msidata.opls, main="OPLS coefficients per m/z", col=colourvector) ## create new summary table summary_df = as.data.frame(summary(msidata.opls)) colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity") summary_df = round(summary_df, digits = 2) plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(summary_df, rows= NULL) #if $type_cond.method_cond.opls_analysis_cond.OPLS_Yweights == "TRUE": ## Yweights plot: represent the importance of each response variable in predicting each component Yweights = as.data.frame(msidata.opls@resultData@listData[[1]][["Yweights"]]) Yweights = round(Yweights, digits = 4) Yweights.class <- cbind("class" = rownames(Yweights), Yweights) plot(0,type='n',axes=FALSE,ann=FALSE) text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2) grid.table(Yweights.class, rows= NULL) #end if coefficient_plot = plot(msidata.opls, values="coefficients", lwd=2, main = "OPLS coefficients per m/z") print(coefficient_plot) ## m/z and pixel information output opls_classes = data.frame(msidata.opls@resultData@listData[[1]][["class"]]) ## pixel names and coordinates x_coords = msidata_coordinates@listData[["x"]] y_coords = msidata_coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) opls_classes2 = data.frame(pixel_names, x_coords, y_coords, opls_classes, y_vector) colnames(opls_classes2) = c("pixel names", "x", "y","predicted_class", "annotated_class") opls_classes2\$correct <- ifelse(opls_classes2\$predicted_class == opls_classes2\$annotated_class, T, F) write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") correctness = round(sum(opls_classes2\$correct)/length(opls_classes2\$correct)*100,2) ## remove msidata to clean up RAM space rm(msidata) gc() ## replace topFeatures table with coefficients table coefficients.df = as.data.frame(msidata.opls@resultData@listData[[1]][["coefficients"]]) row_names <- msidata.opls@featureData@mz coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df) write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) ## add loadings and weights table loadings.df = as.data.frame(msidata.opls@resultData@listData[[1]][["loadings"]]) loadings.df <- cbind("mz" = row_names, loadings.df) new_names <- paste0("loadings_", names(loadings.df)[-1]) names(loadings.df)[-1] <- new_names weights.df = as.data.frame(msidata.opls@resultData@listData[[1]][["weights"]]) weights.df <- cbind("mz" = row_names, weights.df) new_names <- paste0("weights_", names(weights.df)[-1]) names(weights.df)[-1] <- new_names ## combine loading and weights table merged.load.wei = merge(loadings.df, weights.df, by = "mz") write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) ## image with predicted classes prediction_df = as.data.frame(cbind(coord(msidata.opls)[,1:2], opls_classes)) colnames(prediction_df) = c("x", "y", "predicted_classes") prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ geom_tile() + coord_fixed()+ ggtitle("Predicted condition for each pixel")+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=4,byrow=TRUE))+ scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) print(prediction_plot) ## correctness plot correctness_plot = ggplot(opls_classes2, aes(x=x, y=y, fill=correct))+ geom_tile() + coord_fixed()+ ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=2,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~correct, data=opls_classes2, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) print(correctness_plot) ## optional output as .RData #if $output_rdata: save(msidata.opls, file="$classification_rdata") #end if #end if ######################## SSC ############################# #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": print("SSC") ######################## SSC - CV ############################# #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply": print("SSC cv") ## set variables for components and number of response groups number_groups = length(levels(y_vector)) ## SSC-cvApply: msidata.cv.ssc <- crossValidate(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") ## remove msidata to clean up RAM space rm(msidata) gc() ## new table and plot of accuracies over all components summary.cv.ssc = as.data.frame(summary(msidata.cv.ssc)) summary.cv.ssc.round <- round(summary.cv.ssc, digits=2) par(opar) plot(0,type='n',axes=FALSE,ann=FALSE) title(main="Summary for the different parameters\n", adj=0.5) ## 20 rows fits in one page: if (nrow(summary.cv.ssc.round)<=20){ grid.table(summary.cv.ssc.round, rows= NULL) }else{ grid.table(summary.cv.ssc.round[1:20,], rows= NULL) mincount = 21 maxcount = 40 for (count20 in 1:(ceiling(nrow(summary.cv.ssc.round)/20)-1)){ plot(0,type='n',axes=FALSE,ann=FALSE) if (maxcount <= nrow(summary.cv.ssc.round)){ grid.table(summary.cv.ssc.round[mincount:maxcount,], rows= NULL) mincount = mincount+20 maxcount = maxcount+20 }else{### stop last page with last sample otherwise NA in table grid.table(summary.cv.ssc.round[mincount:nrow(summary.cv.ssc.round),], rows= NULL)} } } ## new accuracy plots #if $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "TRUE": accuracy_plot = ggplot(summary.cv.ssc, aes(x = s, y = Accuracy)) + geom_point(color = "blue", size = 3) + # Add points geom_line() + theme_bw() + facet_wrap(~ r) print(accuracy_plot) ## or as alternative accuracy plot for each r value on own page: #elif $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "FALSE": unique_r_values <- unique(summary.cv.ssc\$r) for (r_value in unique_r_values) { ## Create a subset for the current value of r plot_data <- subset(summary.cv.ssc, r == r_value) ## Create the accuracy plot for the current value of r accuracy_plot <- ggplot(plot_data, aes(x = s, y = Accuracy)) + geom_point(color = "blue", size = 3) + # Add points geom_line() + theme_bw() + ggtitle(paste("Plot for r =", r_value)) + # Add a title theme(plot.title = element_text(hjust = 0.5)) # Center the title print(accuracy_plot) } #end if ## table with cv values per fold group for each combination of r and s r_s_df = as.data.frame(msidata.cv.ssc@modelData@listData) r_s_df\$parameter = paste0("r=", r_s_df\$r, " and s=", r_s_df\$s) iteration = seq_along(r_s_df\$parameter) results_list <- NULL for (i in iteration) { ## extract accuracy, sensitivity, and specificity for the current i accuracy <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["accuracy"]]), digits=2) sensitivity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["sensitivity"]]), digits=2) specificity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["specificity"]]), digits=2) ## combine accuracy, sensitivity, and specificity into one data frame result_df <- cbind(folds = rownames(accuracy), parameter = r_s_df\$parameter[i], accuracy, sensitivity, specificity) colnames(result_df) <- c("folds", "parameter", "accuracy", "sensitivity", "specificity") rownames(result_df) <- NULL ## add column names as first row to each dataframe col_names_row <- data.frame(folds = "folds", parameter = "parameter", accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") result_df <- rbind(col_names_row, result_df) results_list[[i]] <- result_df } ## combine all data frames in the list into one data frame results_df <- do.call(rbind, results_list) summary_df <- results_df par(opar) plot(0,type='n',axes=FALSE,ann=FALSE) title(main="More advanced folds output table: \n Summary for each fold\n", adj=0.5) ## 20 rows fits in one page: if (nrow(summary_df)<=20){ grid.table(summary_df, rows= NULL) }else{ grid.table(summary_df[1:20,], rows= NULL) mincount = 21 maxcount = 40 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ plot(0,type='n',axes=FALSE,ann=FALSE) if (maxcount <= nrow(summary_df)){ grid.table(summary_df[mincount:maxcount,], rows= NULL) mincount = mincount+20 maxcount = maxcount+20 }else{### stop last page with last sample otherwise NA in table grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} } } ## new code to extract best r and s values max_accuracy_index <- which.max(summary.cv.ssc\$Accuracy) ## extract the corresponding values of "r" and "s" highest_accuracy_r <- summary.cv.ssc\$r[max_accuracy_index] highest_accuracy_s <- summary.cv.ssc\$s[max_accuracy_index] #if $type_cond.method_cond.ssc_analysis_cond.write_best_params: write.table(highest_accuracy_r, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") write.table(highest_accuracy_s, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") #end if ## optional output as .RData #if $output_rdata: save(msidata.cv.ssc, file="$classification_rdata") #end if ######################## SSC -analysis ########################### #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis": print("SSC analysis") ## set variables for components and number of response groups number_groups = length(levels(y_vector)) ## SSC analysis and plot msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") print(plot(msidata.ssc, values = "statistic", model = list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), col=colourvector, lwd=2)) ### stop if multiple values for r and s were used as input tryCatch( { if (length(names(msidata.ssc@resultData))>1) { stop(call.=FALSE) } }, error=function(cond) { ## in case user used multiple inputs for r or s stop - this is only possible in cv apply message("Error during SSC training") message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)") stop(call.=FALSE) } ) summary_df = as.data.frame(summary(msidata.ssc)) summary_df = round(summary_df, digits=3) colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity") plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(summary_df, rows= NULL) ## image of the best m/z minimumy = min(coord(msidata)[,2]) maximumy = max(coord(msidata)[,2]) print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) ## m/z and pixel information output x_coords = msidata_coordinates@listData[["x"]] y_coords = msidata_coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) ## remove msidata to clean up RAM space rm(msidata) gc() ## toplabel (m/z features output) ssc_toplabels = topFeatures(msidata.ssc, n=$type_cond.method_cond.ssc_toplabels) ssc_toplabels@listData[["centers"]] = round (ssc_toplabels@listData[["centers"]], digits = 6) ssc_toplabels@listData[["statistic"]] = round (ssc_toplabels@listData[["statistic"]], digits = 6) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="class", layout=c(1,1), main="Class Prediction")) print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="probability", layout=c(1,1), main="Class Probabilities")) ## pixel output with correctness ssc_classes = data.frame(msidata.ssc@resultData@listData[[1]][["class"]]) colnames(ssc_classes) = "predicted_class" ssc_classes\$predicted_class = ifelse(is.na(ssc_classes\$predicted_class), "NA", as.character(ssc_classes\$predicted_class)) ssc_probabilities = data.frame(msidata.ssc@resultData@listData[[1]][["probability"]]) ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes, ssc_probabilities, y_vector) colnames(ssc_classes2) = c("pixel_names", "x", "y","predicted_classes", levels(msidata.ssc@resultData@listData[[1]][["class"]]), "annotated_class") ssc_classes2\$correct<- ifelse(ssc_classes2\$predicted_classes==ssc_classes2\$annotated_class, T, F) correctness = round(sum(ssc_classes2\$correct, na.rm = TRUE)/length(ssc_classes2\$correct)*100,2) write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") correctness_plot = ggplot(ssc_classes2, aes(x=x, y=y, fill=correct))+ geom_tile() + coord_fixed()+ ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=2,byrow=TRUE)) coord_labels = aggregate(cbind(x,y)~correct, data=ssc_classes2, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) print(correctness_plot) ## optional output as .RData #if $output_rdata: save(msidata.ssc, file="$classification_rdata") #end if #end if #end if ######################## II) Prediction ############################# ############################################################################# #elif str($type_cond.type_method) == "prediction": print("prediction") training_data = loadRData("$type_cond.training_result") #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": print("new response") new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE) new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)] colnames(new_y_input)[1:2] = c("x", "y") ## merge with coordinate information of msidata msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) colnames(msidata_coordinates)[3] = "pixel_index" merged_response = as.data.frame(merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE)) merged_response[is.na(merged_response)] = "NA" merged_response = merged_response[order(merged_response\$pixel_index),] new_y_vector = as.factor(merged_response[,4]) prediction = predict(training_data, msidata, newy = new_y_vector) ##numbers of levels for colour selection number_levels = length(levels(new_y_vector)) ##new summary table ##if SSC classification, summary table has more results: #if str($type_cond.classification_type) == "SSC_classifier": print("SSC classification summary") summary_df = as.data.frame(summary(prediction)) summary_df = round(summary_df, digits=3) colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity") plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(summary_df, rows= NULL) ## else PLS or OPLS classifier: #else print("PLS/OPLS classifier") summary_df = as.data.frame(summary(prediction)) colnames(summary_df) = c("Component", "Accuracy", "Sensitivity", "Specificity") summary_df = round(summary_df, digits = 2) plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(summary_df, rows= NULL) #end if ##else for prediction without a new annotation (no calculation of accuracy): #else prediction = predict(training_data, msidata) number_levels = length(levels(training_data@resultData@listData[[1]][["class"]])) #end if ## colours selection: #if str($colour_conditional.colour_type) == "manual_colour" #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) colourvector = c($color_string) #elif str($colour_conditional.colour_type) == "colourpalette" colourvector = noquote($colour_conditional.palettes)(number_levels) #end if ## m/z and pixel information output predicted_classes = data.frame(prediction@resultData@listData[[1]][["class"]]) msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) colnames(msidata_coordinates)[3] = "pixel_index" x_coords = msidata_coordinates@listData[["x"]] y_coords = msidata_coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes) colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") ##topFeatures only available for SSC; for PLS and OPLS coefficients loading and weights are provided #if str($type_cond.classification_type) == "SSC_classifier": predicted_toplabels = topFeatures(prediction, n=$type_cond.classification_type_cond.predicted_toplabels) predicted_toplabels <- as.data.frame(predicted_toplabels) predicted_toplabels[,6:7] <-round(predicted_toplabels[,6:7], digits = 5) write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") #else ## if PLS or OPLS classifier, coefficients, loadings, and weights instead of topFeatures coefficients.df = as.data.frame(prediction@resultData@listData[[1]][["coefficients"]]) row_names <- prediction@featureData@mz coefficients.df <- cbind("mz" = row_names, coefficients.df) write.table(coefficients.df, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) ## add loadings and weights table loadings.df = as.data.frame(prediction@resultData@listData[[1]][["loadings"]]) loadings.df <- cbind("mz" = row_names, loadings.df) new_names <- paste0("loadings_", names(loadings.df)[-1]) names(loadings.df)[-1] <- new_names weights.df = as.data.frame(prediction@resultData@listData[[1]][["weights"]]) weights.df <- cbind("mz" = row_names, weights.df) new_names <- paste0("weights_", names(weights.df)[-1]) names(weights.df)[-1] <- new_names ## combine loading and weights table merged.load.wei = merge(loadings.df, weights.df, by = "mz") write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) #end if ##predicted classes prediction_df = as.data.frame(cbind(coord(prediction)[,1:2], predicted_classes)) colnames(prediction_df) = c("x", "y", "predicted_classes") #if str($type_cond.classification_type) == "SSC_classifier": ## this seems to work only for SSC, therefore overwrite tables predicted_probabilities = data.frame(prediction@resultData@listData[[1]][["probability"]]) predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes, predicted_probabilities) colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction@resultData@listData[[1]][["class"]])) ## also image modes are specific to SSC print(predicted_classes2[1:5,]) print(image(prediction, values="class", layout=c(1,1), main="Class Prediction", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector)) print(image(prediction, values="probability", layout=c(1,1), main="Class Probabilities",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector)) #else prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ geom_tile()+ coord_fixed()+ ggtitle("Predicted condition for each spectrum")+ theme_bw()+ theme( plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom", legend.direction="vertical")+ guides(fill=guide_legend(ncol=4, byrow=TRUE))+ scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") ##coord_labels\$file_number = gsub( "_.*ยง", "", coord_labels\$predicted_classes) print(prediction_plot) #end if #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": ## image with right and wrong classes: comparison_df = as.data.frame(cbind(prediction_df, new_y_vector)) colnames(comparison_df) = c("x", "y", "predicted_class", "annotated_class") comparison_df\$predicted_class = ifelse(is.na(comparison_df\$predicted_class), "NA", as.character(comparison_df\$predicted_class)) comparison_df\$correct <- ifelse(comparison_df\$predicted_class==comparison_df\$annotated_class, T, F) correctness = round(sum(comparison_df\$correct, na.rm = TRUE)/length(comparison_df\$correct)*100,2) correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ geom_tile()+ scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ coord_fixed()+ ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ guides(fill=guide_legend(ncol=2,byrow=TRUE)) print(correctness_plot) #end if ## pixel output #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": print("new response output") write.table(comparison_df, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") #else write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") #end if ## optional output as .RData #if $output_rdata: msidata = prediction save(msidata, file="$classification_rdata") #end if #end if dev.off() }else{ plot.new() text(0.5, 0.5, "Inputfile has no intensities > 0 \n or contains NA values.", cex = 1.5) print("Inputfile has no intensities > 0 or contains NA values") dev.off() } ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <conditional name="type_cond"> <param name="type_method" type="select" label="Analysis step to perform"> <option value="training" selected="True">training</option> <option value="prediction">prediction</option> </param> <when value="training"> <param name="annotation_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and their classes" help="Three or four columns: x values, y values, response values, optionally fold values"/> <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> <param name="column_response" data_ref="annotation_file" label="Column with response (condition) values" type="data_column" help="This is the condition (pixel group) which will be classified"/> <param name="column_fold" data_ref="annotation_file" optional="True" label="Column with fold values - only neccessary for cvapply" type="data_column" help="Each fold must contain pixels of all response groups and is used for cross validation"/> <param name="tabular_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/> <conditional name="method_cond"> <param name="class_method" type="select" label="Select the method for classification"> <option value="PLS" selected="True">PLS-DA</option> <option value="OPLS">OPLS-DA</option> <option value="spatialShrunkenCentroids">spatial shrunken centroids</option> </param> <when value="PLS"> <conditional name="analysis_cond"> <param name="PLS_method" type="select" label="Crossvalidation or analysis"> <option value="cvapply" selected="True">cvApply</option> <option value="PLS_analysis">PLS-DA analysis</option> </param> <when value="cvapply"> <param name="plscv_comp" type="text" value="1:2" label="The number of PLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Mininum is 1."> <expand macro="sanitizer_multiple_digits"/> </param> </when> <when value="PLS_analysis"> <param name="pls_comp" type="integer" value="5" label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/> <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> <param name="PLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/> <!--param name="pls_toplabels" type="integer" value="100 label="Number of toplabels (m/z features) which should be written in tabular output"/--> </when> </conditional> </when> <when value="OPLS"> <conditional name="opls_analysis_cond"> <param name="opls_method" type="select" label="Analysis step to perform"> <option value="opls_cvapply" selected="True">cvApply</option> <option value="opls_analysis">OPLS-DA analysis</option> </param> <when value="opls_cvapply"> <param name="opls_cvcomp" type="text" value="1:2" label="The number of OPLS-DA components" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5). Minimum is 1."> <expand macro="sanitizer_multiple_digits"/> </param> <!--param name="xnew_cv" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> </when> <when value="opls_analysis"> <param name="opls_comp" type="integer" value="5" label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/> <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> <param name="OPLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/> <!--param name="opls_toplabels" type="integer" value="100" label="Number of toplabels (m/z features) which should be written in tabular output"/--> </when> </conditional> </when> <when value="spatialShrunkenCentroids"> <conditional name="ssc_analysis_cond"> <param name="ssc_method" type="select" label="Analysis step to perform"> <option value="ssc_cvapply" selected="True">cvApply</option> <option value="ssc_analysis">spatial shrunken centroids analysis</option> </param> <when value="ssc_cvapply"> <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/> <param name="ssc_cv_accuracy_plot" type="boolean" label="Plot CV accuracy plots on one page (=Yes) or individual pages (=No)"/> </when> <when value="ssc_analysis"> <param name="ssc_toplabels" type="integer" value="100" label="Number of toplabels (m/z features) which should be written in tabular output"/> </when> </conditional> <param name="ssc_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="ssc_s" type="text" value="2" label="The sparsity thresholding parameter by which to shrink the t-statistics (s)." help="For cvapply multiple values are allowed (e.g. 0,1,2 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> <option value="gaussian">gaussian</option> <option value="adaptive" selected="True">adaptive</option> </param> </when> </conditional> </when> <when value="prediction"> <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> <conditional name="classification_type_cond"> <param name="classification_type" type="select" label="Which classification method was used"> <option value="PLS_classifier" selected="True" >PLS classifier</option> <option value="OPLS_classifier">OPLS classifier</option> <option value="SSC_classifier">SSC classifier</option> </param> <when value="PLS_classifier"/> <when value="OPLS_classifier"/> <when value="SSC_classifier"> <param name="predicted_toplabels" type="integer" value="100" label="Number of toplabels (m/z features) which should be written in tabular output"/> </when> </conditional> <conditional name="new_y_values_cond"> <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)"> <option value="no_new_response" selected="True">no</option> <option value="new_response">use annotations</option> </param> <when value="no_new_response"/> <when value="new_response"> <param name="new_response_file" type="data" format="tabular" label="Load tabular file with pixel coordinates and the new response"/> <param name="column_new_x" data_ref="new_response_file" label="Column with x values" type="data_column"/> <param name="column_new_y" data_ref="new_response_file" label="Column with y values" type="data_column"/> <param name="column_new_response" data_ref="new_response_file" label="Column with new response values" type="data_column"/> <param name="new_tabular_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> </conditional> </when> </conditional> <conditional name="colour_conditional"> <param name="colour_type" type="select" label="Choose a colour scheme"> <option value="colourpalette" selected="True" >Colour palette</option> <option value="manual_colour">Manual selection</option> </param> <when value="manual_colour"> <repeat name="colours" title="Colours for the plots" min="1" max="50"> <param name="annotation_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> <sanitizer> <valid initial="string.letters,string.digits"> <add value="#" /> </valid> </sanitizer> </param> </repeat> </when> <when value="colourpalette"> <param name="palettes" type="select" display="radio" label="Select a colourpalette"> <option value="hue_pal()" selected="True">hue</option> <option value="rainbow">rainbow</option> <option value="heat.colors">heat colors</option> <option value="terrain.colors">terrain colors</option> <option value="topo.colors">topo colors</option> <option value="cm.colors">cm colors</option> </param> </when> </conditional> <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> </inputs> <outputs> <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'SSC_classifier'</filter> </data> <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction'</filter> </data> <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: coefficients"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter> </data> <data format="tabular" name="loadings_weights" label="${tool.name} on ${on_string}: loadings and weights"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter> </data> <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> </data> <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s"> <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> </data> <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> <filter>output_rdata</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="4"/> <param name="column_fold" value="3"/> <param name="tabular_header" value="False"/> <conditional name="method_cond"> <param name="class_method" value="PLS"/> <conditional name="analysis_cond"> <param name="PLS_method" value="cvapply"/> <param name="plscv_comp" value="2:4"/> </conditional> </conditional> </conditional> <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/> </test> <test expect_num_outputs="5"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="4"/> <param name="tabular_header" value="False"/> <conditional name="method_cond"> <param name="class_method" value="PLS"/> <conditional name="analysis_cond"> <param name="PLS_method" value="PLS_analysis"/> <param name="pls_comp" value="2"/> <param name="pls_scale" value="TRUE"/> <param name="PLS_Yweights" value="TRUE"/> <!--param name="pls_toplabels" value="100"/--> </conditional> </conditional> </conditional> <param name="output_rdata" value="True"/> <output name="coefficients"> <assert_contents> <has_text text="900.004699707031"/> <has_text text="962.870727539062"/> <has_text text="999.606872558594"/> </assert_contents> </output> <output name="loadings_weights"> <assert_contents> <has_text text="900.076354980469"/> <has_text text="950.495910644531"/> <has_text text="989.024536132812"/> </assert_contents> </output> <output name="pixeloutput" file="pixels_test2.tabular"/> <output name="classification_images" file="test2.pdf" compare="sim_size"/> <output name="classification_rdata" file="test2.rdata" compare="sim_size"/> </test> <test expect_num_outputs="1"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="4"/> <param name="column_fold" value="3"/> <param name="tabular_header" value="False"/> <conditional name="method_cond"> <param name="class_method" value="OPLS"/> <conditional name="opls_analysis_cond"> <param name="opls_method" value="opls_cvapply"/> <param name="opls_cvcomp" value="1:2"/> </conditional> </conditional> </conditional> <output name="classification_images" file="test3.pdf" compare="sim_size"/> </test> <test expect_num_outputs="5"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="4"/> <param name="tabular_header" value="False"/> <conditional name="method_cond"> <param name="class_method" value="OPLS"/> <conditional name="opls_analysis_cond"> <param name="opls_method" value="opls_analysis"/> <param name="opls_comp" value="3"/> <param name="opls_scale" value="FALSE"/> <param name="PLS_Yweights" value="FALSE"/> </conditional> </conditional> </conditional> <param name="output_rdata" value="True"/> <output name="pixeloutput" file="pixels_test4.tabular"/> <output name="coefficients"> <assert_contents> <has_text text="900.148010253906"/> <has_text text="974.132446289062"/> <has_text text="999.908935546875"/> </assert_contents> </output> <output name="loadings_weights"> <assert_contents> <has_text text="901.581848144531"/> <has_text text="939.189086914062"/> <has_text text="984.185363769531"/> </assert_contents> </output> <output name="classification_images" file="test4.pdf" compare="sim_size"/> <output name="classification_rdata" file="test4.rdata" compare="sim_size"/> </test> <test expect_num_outputs="3"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="3"/> <param name="column_fold" value="4"/> <param name="tabular_header" value="False"/> <conditional name="method_cond"> <param name="class_method" value="spatialShrunkenCentroids"/> <conditional name="ssc_analysis_cond"> <param name="ssc_method" value="ssc_cvapply"/> <param name="ssc_r" value="1:2"/> <param name="ssc_s" value="2:3"/> <param name="ssc_kernel_method" value="adaptive"/> <param name="write_best_params" value="TRUE"/> </conditional> </conditional> </conditional> <output name="classification_images" file="test5.pdf" compare="sim_size"/> <output name="best_r" file="best_r_test5.txt"/> <output name="best_s" file="best_s_test5.txt"/> </test> <test expect_num_outputs="4"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="training"/> <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_response" value="4"/> <conditional name="method_cond"> <param name="class_method" value="spatialShrunkenCentroids"/> <conditional name="ssc_analysis_cond"> <param name="ssc_method" value="ssc_analysis"/> <param name="ssc_toplabels" value="20"/> </conditional> <param name="ssc_r" value="2"/> <param name="ssc_s" value="2"/> <param name="ssc_kernel_method" value="adaptive"/> </conditional> </conditional> <param name="output_rdata" value="True"/> <output name="mzfeatures" file="features_test6.tabular"/> <output name="pixeloutput" file="pixels_test6.tabular"/> <output name="classification_images" file="test6.pdf" compare="sim_size"/> <output name="classification_rdata" file="test6.rdata" compare="sim_size" delta="15000"/> </test> <test expect_num_outputs="5"> <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> <conditional name="type_cond"> <param name="type_method" value="prediction"/> <param name="type_method" value="prediction"/> <param name="training_result" value="test2.rdata" ftype="rdata"/> <param name="classification_type" value="PLS_classifier"/> <conditional name="new_y_values_cond"> <param name="new_y_values" value="new_response"/> <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/> <param name="column_new_x" value="1"/> <param name="column_new_y" value="2"/> <param name="column_new_response" value="4"/> <param name="new_tabular_header" value="False"/> </conditional> </conditional> <param name="output_rdata" value="True"/> <output name="coefficients" file="coefficients_test7.tabular"/> <output name="loadings_weights" file="loadings_and_weights_test7.tabular"/> <output name="pixeloutput" file="pixels_test7.tabular"/> <output name="classification_images" file="test7.pdf" compare="sim_size"/> <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ - NA intensities are not allowed - duplicated coordinates will be removed - For training: tabular file with condition and fold for each pixel: Two columns for pixel coordinates (x and y values); one column with the condition for the pixel, which will be used for classification; for the cross validation (cvapply) another column with a fold is necessary, each fold must contain pixels of all response groups and is used for cross validation. Condition and fold columns are treated as factor to perform discriminant analysis (also when numeric values are provided). :: x_coord y_coord condition fold 1 1 A f1 2 1 A f2 3 1 A f3 1 2 B f1 2 2 B f2 3 2 B f3 ... ... - For prediction: RData output from previous classification run is needed as input, optionally new response values can be loaded with a tabular file containing x values, y values and the response **Options** - PLS-DA: partial least square discriminant analysis - O-PLS-DA: Orthogonal partial least squares discriminant analysis - Spatial shrunken centroids (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) - training and prediction - training can be done with cvapply that uses cross validation to find the best value for s, this requires not only a condition for each spectrum but also a fold (each fold should contain spectra of all conditions) - training with the best value for r and s gives the top m/z features for each condition and the predicted classification group for each spectrum - training result can be saved as RData file that can be reused for prediction of further samples - prediction can calculate accuracies when the annotations are known and provided .. image:: $PATH_TO_IMAGES/classification_overview.png :width: 1000 :height: 465 **Tips** - The classification function will only run on files with valid intensity values (NA are not allowed) - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool. **Output** - Pdf with the heatmaps and plots for the classification - Tabular file with information on m/z features and pixels: toplabels/classes - Optional: RData output that can be used to predict new data or to explore the results more deeply with the Cardinal package in R ]]> </help> <expand macro="citations"/> </tool>