Mercurial > repos > galaxyp > cardinal_classification
comparison classification.xml @ 19:4c177985028a draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:45:03 +0000 |
parents | eddc2ae2db80 |
children |
comparison
equal
deleted
inserted
replaced
18:0a18ac48ac53 | 19:4c177985028a |
---|---|
1 <tool id="cardinal_classification" name="MSI classification" version="@VERSION@.0"> | 1 <tool id="cardinal_classification" name="MSI classification" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> |
2 <description>spatial classification of mass spectrometry imaging data</description> | 2 <description>spatial classification of mass spectrometry imaging data</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements"> | 6 <expand macro="requirements"/> |
7 <requirement type="package" version="2.3">r-gridextra</requirement> | |
8 <requirement type="package" version="3.3.5">r-ggplot2</requirement> | |
9 </expand> | |
10 <command detect_errors="exit_code"> | 7 <command detect_errors="exit_code"> |
11 <![CDATA[ | 8 <![CDATA[ |
12 | 9 |
13 @INPUT_LINKING@ | 10 @INPUT_LINKING@ |
14 cat '${MSI_segmentation}' && | 11 cat '${MSI_segmentation}' && |
15 Rscript '${MSI_segmentation}' | 12 Rscript '${MSI_segmentation}' |
16 | 13 |
25 library(Cardinal) | 22 library(Cardinal) |
26 library(gridExtra) | 23 library(gridExtra) |
27 library(ggplot2) | 24 library(ggplot2) |
28 library(scales) | 25 library(scales) |
29 | 26 |
27 | |
28 | |
30 @READING_MSIDATA@ | 29 @READING_MSIDATA@ |
31 | 30 |
32 msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet | 31 |
33 | 32 msidata = as(msidata, "MSImagingExperiment") |
34 | 33 |
35 ## remove duplicated coordinates | 34 ## remove duplicated coordinates |
36 msidata <- msidata[,!duplicated(coord(msidata))] | 35 msidata <- msidata[,!duplicated(coord(msidata))] |
37 | 36 |
38 @DATA_PROPERTIES_INRAM@ | 37 @DATA_PROPERTIES_INRAM@ |
60 ################################################################################ | 59 ################################################################################ |
61 | 60 |
62 ## table with values | 61 ## table with values |
63 grid.table(property_df, rows= NULL) | 62 grid.table(property_df, rows= NULL) |
64 | 63 |
65 | 64 int_matrix = as.matrix(spectra(msidata)) |
66 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ | 65 NAcount = sum(is.na(int_matrix)) |
66 | |
67 | |
68 if (npeaks > 0 && NAcount==0){ | |
67 | 69 |
68 opar <- par() | 70 opar <- par() |
69 | 71 |
70 ######################## II) Training ####################################### | 72 ######################## II) Training ####################################### |
71 ############################################################################# | 73 ############################################################################# |
73 print("training") | 75 print("training") |
74 | 76 |
75 | 77 |
76 ## load y response (will be needed in every training scenario) | 78 ## load y response (will be needed in every training scenario) |
77 | 79 |
78 y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE) | 80 y_tabular = read.delim("$type_cond.annotation_file", header = $type_cond.tabular_header, stringsAsFactors = FALSE) |
79 | 81 |
80 #if str($type_cond.column_fold) == "None": | 82 #if str($type_cond.column_fold) == "None": |
81 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)] | 83 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response)] |
82 #else | 84 #else |
83 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)] | 85 y_input = y_tabular[,c($type_cond.column_x, $type_cond.column_y, $type_cond.column_response, $type_cond.column_fold)] |
84 #end if | 86 #end if |
85 | 87 colnames(y_input)[1:2] = c("x", "y") |
86 colnames(y_input)[1:2] = c("x", "y") | 88 |
87 ## merge with coordinate information of msidata | 89 ## merge with coordinate information of msidata |
88 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | 90 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) |
89 colnames(msidata_coordinates)[3] = "pixel_index" | 91 colnames(msidata_coordinates)[3] = "pixel_index" |
90 merged_response = merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE) | 92 merged_response = as.data.frame(merge(msidata_coordinates, y_input, by=c("x", "y"), all.x=TRUE)) |
91 merged_response[is.na(merged_response)] = "NA" | 93 merged_response[is.na(merged_response)] = "NA" |
92 merged_response = merged_response[order(merged_response\$pixel_index),] | 94 merged_response = merged_response[order(merged_response\$pixel_index),] |
93 conditions = as.factor(merged_response[,4]) | 95 conditions = as.factor(merged_response[,4]) |
94 y_vector = conditions | 96 y_vector = conditions |
95 | 97 |
96 ## colours selection: | 98 ## colours selection: |
97 | 99 |
98 #if str($colour_conditional.colour_type) == "manual_colour" | 100 #if str($colour_conditional.colour_type) == "manual_colour" |
99 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) | 101 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) |
100 colourvector = c($color_string) | 102 colourvector = c($color_string) |
101 | 103 |
102 #elif str($colour_conditional.colour_type) == "colourpalette" | 104 #elif str($colour_conditional.colour_type) == "colourpalette" |
103 number_levels = (length(levels(conditions))) | 105 number_levels = (length(levels(conditions))) |
104 colourvector = noquote($colour_conditional.palettes)(number_levels) | 106 colourvector = noquote($colour_conditional.palettes)(number_levels) |
105 | 107 |
106 #end if | 108 #end if |
107 | 109 |
108 ## plot of y vector | 110 |
109 | 111 ## plot of y vector |
110 position_df = cbind(coord(msidata)[,1:2], conditions) | 112 |
111 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+ | 113 position_df = as.data.frame(cbind(coord(msidata)[,1:2], conditions)) |
114 y_plot = ggplot(position_df, aes(x=x, y=y, fill=conditions))+ | |
112 geom_tile() + | 115 geom_tile() + |
113 coord_fixed()+ | 116 coord_fixed()+ |
114 ggtitle("Distribution of the conditions")+ | 117 ggtitle("Distribution of the conditions")+ |
115 theme_bw()+ | 118 theme_bw()+ |
116 theme( | 119 theme( |
117 plot.background = element_blank(), | 120 plot.background = element_blank(), |
118 panel.grid.major = element_blank(), | 121 panel.grid.major = element_blank(), |
119 panel.grid.minor = element_blank())+ | 122 panel.grid.minor = element_blank())+ |
120 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 123 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
121 theme(legend.position="bottom",legend.direction="vertical")+ | 124 theme(legend.position="bottom",legend.direction="vertical")+ |
122 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ | 125 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ |
123 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) | 126 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) |
124 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | 127 coord_labels = aggregate(cbind(x,y)~conditions, data=position_df, mean, na.rm=TRUE, na.action="na.pass") |
125 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions) | 128 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$conditions) |
126 print(y_plot) | 129 print(y_plot) |
127 | 130 |
128 | 131 ## plot of folds |
129 ## plot of folds | 132 |
130 | 133 #if str($type_cond.column_fold) != "None": |
131 #if str($type_cond.column_fold) != "None": | 134 fold_vector = as.factor(merged_response[,5]) |
132 fold_vector = as.factor(merged_response[,5]) | 135 |
133 | 136 position_df = as.data.frame(cbind(coord(msidata)[,1:2], fold_vector)) |
134 | 137 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ |
135 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
136 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
137 geom_tile() + | 138 geom_tile() + |
138 coord_fixed()+ | 139 coord_fixed()+ |
139 ggtitle("Distribution of the fold variable")+ | 140 ggtitle("Distribution of the fold variable")+ |
140 theme_bw()+ | 141 theme_bw()+ |
141 theme( | 142 theme( |
142 plot.background = element_blank(), | 143 plot.background = element_blank(), |
143 panel.grid.major = element_blank(), | 144 panel.grid.major = element_blank(), |
144 panel.grid.minor = element_blank())+ | 145 panel.grid.minor = element_blank())+ |
145 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 146 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
146 theme(legend.position="bottom",legend.direction="vertical")+ | 147 theme(legend.position="bottom",legend.direction="vertical")+ |
147 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | 148 guides(fill=guide_legend(ncol=4,byrow=TRUE)) |
148 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | 149 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") |
149 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | 150 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) |
150 print(fold_plot) | 151 print(fold_plot) |
151 | 152 |
152 #end if | 153 #end if |
153 | 154 |
154 ######################## PLS ############################# | 155 ######################## PLS ############################# |
155 #if str( $type_cond.method_cond.class_method) == "PLS": | 156 #if str( $type_cond.method_cond.class_method) == "PLS": |
156 print("PLS") | 157 print("PLS") |
157 | 158 |
162 ## set variables for components and number of response groups | 163 ## set variables for components and number of response groups |
163 components = c($type_cond.method_cond.analysis_cond.plscv_comp) | 164 components = c($type_cond.method_cond.analysis_cond.plscv_comp) |
164 number_groups = length(levels(y_vector)) | 165 number_groups = length(levels(y_vector)) |
165 | 166 |
166 ## PLS-cvApply: | 167 ## PLS-cvApply: |
167 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) | 168 msidata.cv.pls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) |
168 | 169 |
169 ## remove msidata to clean up RAM space | 170 ## remove msidata to clean up RAM space |
170 rm(msidata) | 171 rm(msidata) |
171 gc() | 172 gc() |
172 | 173 |
173 ## create table with summary | 174 ## create new summary table with cv results |
174 count = 1 | 175 results_list <- NULL |
175 summary_plscv = list() | 176 for (i in seq_along(components)) { |
176 accuracy_vector = numeric() | 177 ## extract accuracy, sensitivity, and specificity for the current i |
177 for (iteration in components){ | 178 accuracy <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["accuracy"]]), digits=2) |
178 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]] | 179 sensitivity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["sensitivity"]]), digits=2) |
179 ## change class of numbers into numeric to round and calculate mean | 180 specificity <- round(as.data.frame(msidata.cv.pls@resultData@listData[[i]][["specificity"]]), digits=2) |
180 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | 181 |
181 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | 182 ## combine accuracy, sensitivity, and specificity into one data frame |
182 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | 183 result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity) |
183 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | 184 colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity") |
184 summary_iteration4 = t(summary_iteration3) | 185 rownames(result_df) <- NULL |
185 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) | 186 |
186 summary_plscv[[count]] = summary_iteration5 | 187 ## add column names with ncomp as first row to each dataframe |
187 count = count+1} ## create list with summary table for each component | 188 col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") |
188 summary_plscv = do.call(rbind, summary_plscv) | 189 result_df <- rbind(col_names_row, result_df) |
189 summary_df = as.data.frame(summary_plscv) | 190 |
190 colnames(summary_df) = NULL | 191 results_list[[i]] <- result_df |
191 | 192 } |
192 ## plots | 193 |
193 ## plot to find ncomp with highest accuracy | 194 ## combine all data frames in the list into one data frame |
194 plot(components, accuracy_vector, ylab = "mean accuracy",type="o", main="Mean accuracy of PLS classification") | 195 results_df <- do.call(rbind, results_list) |
195 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | 196 |
196 ## one image for each sample/fold, 4 images per page | 197 summary_df <- results_df |
197 minimumy = min(coord(msidata.cv.pls)[,2]) | 198 |
198 maximumy = max(coord(msidata.cv.pls)[,2]) | 199 ## new table and plot of accuracies over all components |
199 image(msidata.cv.pls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1), col=colourvector) | 200 summary.cv.pls = as.data.frame(summary(msidata.cv.pls)) |
201 plot(0,type='n',axes=FALSE,ann=FALSE) | |
202 summary.cv.pls.round <- round(summary.cv.pls, digits=2) | |
203 grid.table(summary.cv.pls.round, rows=NULL) | |
204 | |
205 accuracy_plot = ggplot(summary.cv.pls, aes(x = ncomp, y = Accuracy)) + | |
206 geom_point(color = "blue", size = 3) + # Add points | |
207 geom_line() + | |
208 theme_bw() | |
209 print(accuracy_plot) | |
200 | 210 |
201 ## print table with summary in pdf | 211 ## print table with summary in pdf |
202 par(opar) | 212 par(opar) |
203 plot(0,type='n',axes=FALSE,ann=FALSE) | 213 plot(0,type='n',axes=FALSE,ann=FALSE) |
204 title(main="Summary for the different components\n", adj=0.5) | 214 title(main="Summary for the different components\n", adj=0.5) |
214 if (maxcount <= nrow(summary_df)){ | 224 if (maxcount <= nrow(summary_df)){ |
215 grid.table(summary_df[mincount:maxcount,], rows= NULL) | 225 grid.table(summary_df[mincount:maxcount,], rows= NULL) |
216 mincount = mincount+20 | 226 mincount = mincount+20 |
217 maxcount = maxcount+20 | 227 maxcount = maxcount+20 |
218 }else{### stop last page with last sample otherwise NA in table | 228 }else{### stop last page with last sample otherwise NA in table |
219 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | 229 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} |
220 } | 230 } |
221 } | 231 } |
222 | 232 |
223 ## optional output as .RData | 233 ## optional output as .RData |
224 #if $output_rdata: | 234 #if $output_rdata: |
247 ## in case user used multiple inputs for component - this is only possible in cv apply | 257 ## in case user used multiple inputs for component - this is only possible in cv apply |
248 message("Error during PLS training") | 258 message("Error during PLS training") |
249 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)") | 259 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for PLS analysis or component was set to 0 but minimum for component is 1)") |
250 stop(call.=FALSE) | 260 stop(call.=FALSE) |
251 } | 261 } |
252 ) | 262 ) |
253 | 263 |
254 ### pls analysis and coefficients plot | 264 ### pls analysis and coefficients plot |
255 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) | 265 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) |
256 plot(msidata.pls, main="PLS coefficients per m/z", col=colourvector) | 266 plot(msidata.pls, main="PLS coefficients per m/z", col=colourvector) |
257 | 267 |
258 ### summary table of PLS | 268 |
259 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]] | 269 ## create new summary table |
260 summary_table2 = round(as.numeric(summary_table), digits=2) | 270 summary_df = as.data.frame(summary(msidata.pls)) |
261 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | 271 colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity") |
262 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | 272 summary_df = round(summary_df, digits = 2) |
263 summary_table4 = t(summary_table3) | |
264 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4) | |
265 plot(0,type='n',axes=FALSE,ann=FALSE) | 273 plot(0,type='n',axes=FALSE,ann=FALSE) |
266 grid.table(summary_table5, rows= NULL) | 274 grid.table(summary_df, rows= NULL) |
267 | 275 |
268 ### image of the best m/z | 276 ## Yweights plot: represent the importance of each response variable in predicting each component |
269 minimumy = min(coord(msidata)[,2]) | 277 |
270 maximumy = max(coord(msidata)[,2]) | 278 #if $type_cond.method_cond.analysis_cond.PLS_Yweights == "TRUE": |
271 print(image(msidata, mz = topFeatures(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), smooth.image="gaussian", main="best m/z heatmap")) | 279 Yweights = as.data.frame(msidata.pls@resultData@listData[[1]][["Yweights"]]) |
272 | 280 Yweights = round(Yweights, digits = 4) |
273 ### m/z and pixel information output | 281 Yweights.class <- cbind("class" = rownames(Yweights), Yweights) |
274 pls_classes = data.frame(msidata.pls\$classes[[1]]) | 282 |
283 plot(0,type='n',axes=FALSE,ann=FALSE) | |
284 text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2) | |
285 grid.table(Yweights.class, rows= NULL) | |
286 | |
287 #end if | |
288 | |
289 coefficient_plot = plot(msidata.pls, values="coefficients", lwd=2, main = "PLS coefficients per m/z") | |
290 print(coefficient_plot) | |
291 | |
292 ## m/z and pixel information output | |
293 pls_classes = data.frame(msidata.pls@resultData@listData[[1]][["class"]]) | |
294 | |
275 ## pixel names and coordinates | 295 ## pixel names and coordinates |
276 ## to remove potential sample names and z dimension, split at comma and take only x and y | 296 x_coords = msidata_coordinates@listData[["x"]] |
277 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) | 297 y_coords = msidata_coordinates@listData[["y"]] |
278 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) | 298 pixel_names = paste0("xy_", x_coords, "_", y_coords) |
279 x_coordinates = gsub("x = ","",x_coords) | |
280 y_coordinates = gsub(" y = ","",y_coords) | |
281 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) | |
282 | 299 |
283 ## remove msidata to clean up RAM space | 300 ## remove msidata to clean up RAM space |
284 rm(msidata) | 301 rm(msidata) |
285 gc() | 302 gc() |
286 pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) | 303 |
287 colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") | 304 pls_classes2 = data.frame(pixel_names, x_coords, y_coords, pls_classes, y_vector) |
288 pls_toplabels = topFeatures(msidata.pls, n=Inf) | 305 colnames(pls_classes2) = c("pixel_name", "x", "y","predicted_class", "annotated_class") |
289 pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) | 306 pls_classes2\$correct <- ifelse(pls_classes2\$predicted_class==pls_classes2\$annotated_class, T, F) |
290 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 307 |
291 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 308 write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
292 | 309 |
310 correctness = round(sum(pls_classes2\$correct)/length(pls_classes2\$correct)*100,2) | |
311 | |
312 ## replace topFeatures table with coefficients table | |
313 coefficients.df = as.data.frame(msidata.pls@resultData@listData[[1]][["coefficients"]]) | |
314 row_names <- msidata.pls@featureData@mz | |
315 coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df) | |
316 write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) | |
317 | |
318 ## add loadings and weights table | |
319 loadings.df = as.data.frame(msidata.pls@resultData@listData[[1]][["loadings"]]) | |
320 loadings.df <- cbind("mz" = row_names, loadings.df) | |
321 new_names <- paste0("loadings_", names(loadings.df)[-1]) | |
322 names(loadings.df)[-1] <- new_names | |
323 | |
324 weights.df = as.data.frame(msidata.pls@resultData@listData[[1]][["weights"]]) | |
325 weights.df <- cbind("mz" = row_names, weights.df) | |
326 new_names <- paste0("weights_", names(weights.df)[-1]) | |
327 names(weights.df)[-1] <- new_names | |
328 | |
329 ## combine loading and weights table | |
330 merged.load.wei = merge(loadings.df, weights.df, by = "mz") | |
331 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) | |
332 | |
293 ## image with predicted classes | 333 ## image with predicted classes |
294 prediction_df = cbind(coord(msidata.pls)[,1:2], pls_classes) | 334 prediction_df = as.data.frame(cbind(coord(msidata.pls)[,1:2], pls_classes)) |
295 colnames(prediction_df) = c("x", "y", "predicted_classes") | 335 colnames(prediction_df) = c("x", "y", "predicted_classes") |
296 | 336 |
297 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 337 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ |
298 geom_tile() + | 338 geom_tile() + |
299 coord_fixed()+ | 339 coord_fixed()+ |
300 ggtitle("Predicted condition for each pixel")+ | 340 ggtitle("Predicted condition for each pixel")+ |
301 theme_bw()+ | 341 theme_bw()+ |
302 theme( | 342 theme( |
303 plot.background = element_blank(), | 343 plot.background = element_blank(), |
304 panel.grid.major = element_blank(), | 344 panel.grid.major = element_blank(), |
305 panel.grid.minor = element_blank())+ | 345 panel.grid.minor = element_blank())+ |
306 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 346 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
307 theme(legend.position="bottom",legend.direction="vertical")+ | 347 theme(legend.position="bottom",legend.direction="vertical")+ |
308 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ | 348 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ |
309 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) | 349 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) |
310 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | 350 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") |
311 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 351 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
312 print(prediction_plot) | 352 print(prediction_plot) |
353 | |
354 ## correctness plot | |
355 correctness_plot = ggplot(pls_classes2, aes(x=x, y=y, fill=correct))+ | |
356 geom_tile() + | |
357 coord_fixed()+ | |
358 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ | |
359 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ | |
360 theme_bw()+ | |
361 theme( | |
362 plot.background = element_blank(), | |
363 panel.grid.major = element_blank(), | |
364 panel.grid.minor = element_blank())+ | |
365 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
366 theme(legend.position="bottom",legend.direction="vertical")+ | |
367 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | |
368 coord_labels = aggregate(cbind(x,y)~correct, data=pls_classes2, mean, na.rm=TRUE, na.action="na.pass") | |
369 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
370 print(correctness_plot) | |
371 | |
313 | 372 |
314 ### optional output as .RData | 373 ### optional output as .RData |
315 #if $output_rdata: | 374 #if $output_rdata: |
316 save(msidata.pls, file="$classification_rdata") | 375 save(msidata.pls, file="$classification_rdata") |
317 #end if | 376 #end if |
329 ## set variables for components and number of response groups | 388 ## set variables for components and number of response groups |
330 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) | 389 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) |
331 number_groups = length(levels(y_vector)) | 390 number_groups = length(levels(y_vector)) |
332 | 391 |
333 ## OPLS-cvApply: | 392 ## OPLS-cvApply: |
334 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components) | 393 msidata.cv.opls <- crossValidate(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components) |
335 ## for use to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv | |
336 | 394 |
337 ## remove msidata to clean up RAM space | 395 ## remove msidata to clean up RAM space |
338 rm(msidata) | 396 rm(msidata) |
339 gc() | 397 gc() |
340 | 398 |
341 ## create table with summary | 399 |
342 count = 1 | 400 |
343 summary_oplscv = list() | 401 ## new table with cv results to replace the old summary table |
344 accuracy_vector = numeric() | 402 results_list <- NULL |
345 for (iteration in components){ | 403 for (i in seq_along(components)) { |
346 | 404 ## extract accuracy, sensitivity, and specificity for the current i |
347 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]] | 405 accuracy <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["accuracy"]]), digits=2) |
348 ## change class of numbers into numeric to round and calculate mean | 406 sensitivity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["sensitivity"]]), digits=2) |
349 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | 407 specificity <- round(as.data.frame(msidata.cv.opls@resultData@listData[[i]][["specificity"]]), digits=2) |
350 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | 408 |
351 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | 409 ## combine accuracy, sensitivity, and specificity into one data frame |
352 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | 410 result_df <- cbind(folds = rownames(accuracy), ncomp = i, accuracy, sensitivity, specificity) |
353 summary_iteration4 = t(summary_iteration3) | 411 colnames(result_df) <- c("folds", "ncomp", "accuracy", "sensitivity", "specificity") |
354 summary_iteration5 = cbind(c(paste0("ncomp = ", iteration), colnames(summary_iteration)), summary_iteration4) | 412 rownames(result_df) <- NULL |
355 summary_oplscv[[count]] = summary_iteration5 | 413 |
356 count = count+1} ## create list with summary table for each component | 414 ## add column names with ncomp as first row to each dataframe |
357 summary_oplscv = do.call(rbind, summary_oplscv) | 415 col_names_row <- data.frame(folds = "folds", ncomp = paste0("ncomp", i), accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") |
358 summary_df = as.data.frame(summary_oplscv) | 416 result_df <- rbind(col_names_row, result_df) |
359 colnames(summary_df) = NULL | 417 |
360 | 418 results_list[[i]] <- result_df |
361 ## plots | 419 } |
362 ## plot to find ncomp with highest accuracy | 420 |
363 plot(components, accuracy_vector, ylab = "mean accuracy", type="o", main="Mean accuracy of OPLS classification") | 421 ## combine all data frames in the list into one data frame |
364 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | 422 results_df <- do.call(rbind, results_list) |
365 ## one image for each sample/fold, 4 images per page | 423 |
366 minimumy = min(coord(msidata.cv.opls)[,2]) | 424 summary_df <- results_df |
367 maximumy = max(coord(msidata.cv.opls)[,2]) | 425 |
368 image(msidata.cv.opls, model = list(ncomp = ncomp_max),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout = c(1, 1), col=colourvector) | 426 ## new table and plot of accuracies over all components |
427 summary.cv.opls = as.data.frame(summary(msidata.cv.opls)) | |
428 | |
429 ## table with values | |
430 plot(0,type='n',axes=FALSE,ann=FALSE) | |
431 summary.cv.opls.round <- round(summary.cv.opls, digits=2) | |
432 grid.table(summary.cv.opls.round, rows=NULL) | |
433 | |
434 accuracy_plot = ggplot(summary.cv.opls, aes(x = ncomp, y = Accuracy)) + | |
435 geom_point(color = "blue", size = 3) + # Add points | |
436 geom_line() + | |
437 theme_bw() | |
438 print(accuracy_plot) | |
369 | 439 |
370 ## print table with summary in pdf | 440 ## print table with summary in pdf |
371 par(opar) | 441 par(opar) |
372 plot(0,type='n',axes=FALSE,ann=FALSE) | 442 plot(0,type='n',axes=FALSE,ann=FALSE) |
373 title(main="Summary for the different components\n", adj=0.5) | 443 title(main="Summary for the different components\n", adj=0.5) |
383 if (maxcount <= nrow(summary_df)){ | 453 if (maxcount <= nrow(summary_df)){ |
384 grid.table(summary_df[mincount:maxcount,], rows= NULL) | 454 grid.table(summary_df[mincount:maxcount,], rows= NULL) |
385 mincount = mincount+20 | 455 mincount = mincount+20 |
386 maxcount = maxcount+20 | 456 maxcount = maxcount+20 |
387 }else{### stop last page with last sample otherwise NA in table | 457 }else{### stop last page with last sample otherwise NA in table |
388 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | 458 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} |
389 } | 459 } |
390 } | 460 } |
391 | 461 |
392 ## optional output as .RData | 462 ## optional output as .RData |
393 #if $output_rdata: | 463 #if $output_rdata: |
416 ## in case user used multiple inputs for component - this is only possible in cv apply | 486 ## in case user used multiple inputs for component - this is only possible in cv apply |
417 message("Error during OPLS training") | 487 message("Error during OPLS training") |
418 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)") | 488 message("Possible problems: Multiple values for component were selected - this is only possible in cvapply but not for OPLS analysis or component was set to 0 but minimum for component is 1)") |
419 stop(call.=FALSE) | 489 stop(call.=FALSE) |
420 } | 490 } |
421 ) | 491 ) |
422 | 492 |
423 ### opls analysis and coefficients plot | 493 ### opls analysis and coefficients plot |
424 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale) | 494 msidata.opls <- OPLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale) |
425 ## to reduce msidata: keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew | |
426 plot(msidata.opls, main="OPLS coefficients per m/z", col=colourvector) | 495 plot(msidata.opls, main="OPLS coefficients per m/z", col=colourvector) |
427 | 496 |
428 | 497 ## create new summary table |
429 ### summary table of OPLS | 498 summary_df = as.data.frame(summary(msidata.opls)) |
430 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] | 499 colnames(summary_df) = c("Number of Components", "Accuracy", "Sensitivity", "Specificity") |
431 summary_table2 = round(as.numeric(summary_table), digits=2) | 500 summary_df = round(summary_df, digits = 2) |
432 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | |
433 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
434 summary_table4 = t(summary_table3) | |
435 summary_table5 = cbind(c(paste0("ncomp = ", component), colnames(summary_table)), summary_table4) | |
436 plot(0,type='n',axes=FALSE,ann=FALSE) | 501 plot(0,type='n',axes=FALSE,ann=FALSE) |
437 grid.table(summary_table5, rows= NULL) | 502 grid.table(summary_df, rows= NULL) |
438 | 503 |
439 ### image of the best m/z | 504 |
440 minimumy = min(coord(msidata)[,2]) | 505 #if $type_cond.method_cond.opls_analysis_cond.OPLS_Yweights == "TRUE": |
441 maximumy = max(coord(msidata)[,2]) | 506 ## Yweights plot: represent the importance of each response variable in predicting each component |
442 print(image(msidata, mz = topFeatures(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) | 507 Yweights = as.data.frame(msidata.opls@resultData@listData[[1]][["Yweights"]]) |
443 | 508 Yweights = round(Yweights, digits = 4) |
444 opls_classes = data.frame(msidata.opls\$classes[[1]]) | 509 Yweights.class <- cbind("class" = rownames(Yweights), Yweights) |
510 | |
511 plot(0,type='n',axes=FALSE,ann=FALSE) | |
512 text(x = 0.95, y = 1, "Yweights", cex = 2, font = 2) | |
513 grid.table(Yweights.class, rows= NULL) | |
514 #end if | |
515 | |
516 coefficient_plot = plot(msidata.opls, values="coefficients", lwd=2, main = "OPLS coefficients per m/z") | |
517 print(coefficient_plot) | |
518 | |
519 ## m/z and pixel information output | |
520 opls_classes = data.frame(msidata.opls@resultData@listData[[1]][["class"]]) | |
521 | |
445 ## pixel names and coordinates | 522 ## pixel names and coordinates |
446 ## to remove potential sample names and z dimension, split at comma and take only x and y | 523 x_coords = msidata_coordinates@listData[["x"]] |
447 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) | 524 y_coords = msidata_coordinates@listData[["y"]] |
448 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) | 525 pixel_names = paste0("xy_", x_coords, "_", y_coords) |
449 x_coordinates = gsub("x = ","",x_coords) | 526 |
450 y_coordinates = gsub(" y = ","",y_coords) | 527 opls_classes2 = data.frame(pixel_names, x_coords, y_coords, opls_classes, y_vector) |
451 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) | 528 colnames(opls_classes2) = c("pixel names", "x", "y","predicted_class", "annotated_class") |
452 | 529 opls_classes2\$correct <- ifelse(opls_classes2\$predicted_class == opls_classes2\$annotated_class, T, F) |
453 opls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, opls_classes) | 530 |
454 colnames(opls_classes2) = c("pixel names", "x", "y","predicted condition") | 531 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
532 | |
533 correctness = round(sum(opls_classes2\$correct)/length(opls_classes2\$correct)*100,2) | |
455 | 534 |
456 ## remove msidata to clean up RAM space | 535 ## remove msidata to clean up RAM space |
457 rm(msidata) | 536 rm(msidata) |
458 gc() | 537 gc() |
459 | 538 |
460 opls_toplabels = topFeatures(msidata.opls, n=Inf) | 539 ## replace topFeatures table with coefficients table |
461 opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) | 540 coefficients.df = as.data.frame(msidata.opls@resultData@listData[[1]][["coefficients"]]) |
462 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 541 row_names <- msidata.opls@featureData@mz |
463 write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 542 coefficients.df.rownames <- cbind("mz" = row_names, coefficients.df) |
543 write.table(coefficients.df.rownames, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) | |
544 | |
545 ## add loadings and weights table | |
546 loadings.df = as.data.frame(msidata.opls@resultData@listData[[1]][["loadings"]]) | |
547 loadings.df <- cbind("mz" = row_names, loadings.df) | |
548 new_names <- paste0("loadings_", names(loadings.df)[-1]) | |
549 names(loadings.df)[-1] <- new_names | |
550 | |
551 weights.df = as.data.frame(msidata.opls@resultData@listData[[1]][["weights"]]) | |
552 weights.df <- cbind("mz" = row_names, weights.df) | |
553 new_names <- paste0("weights_", names(weights.df)[-1]) | |
554 names(weights.df)[-1] <- new_names | |
555 | |
556 ## combine loading and weights table | |
557 merged.load.wei = merge(loadings.df, weights.df, by = "mz") | |
558 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) | |
464 | 559 |
465 ## image with predicted classes | 560 ## image with predicted classes |
466 prediction_df = cbind(coord(msidata.opls)[,1:2], opls_classes) | 561 prediction_df = as.data.frame(cbind(coord(msidata.opls)[,1:2], opls_classes)) |
467 colnames(prediction_df) = c("x", "y", "predicted_classes") | 562 colnames(prediction_df) = c("x", "y", "predicted_classes") |
468 | 563 |
469 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 564 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ |
470 geom_tile() + | 565 geom_tile() + |
471 coord_fixed()+ | 566 coord_fixed()+ |
472 ggtitle("Predicted condition for each pixel")+ | 567 ggtitle("Predicted condition for each pixel")+ |
473 theme_bw()+ | 568 theme_bw()+ |
474 theme( | 569 theme( |
475 plot.background = element_blank(), | 570 plot.background = element_blank(), |
476 panel.grid.major = element_blank(), | 571 panel.grid.major = element_blank(), |
477 panel.grid.minor = element_blank())+ | 572 panel.grid.minor = element_blank())+ |
478 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 573 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
479 theme(legend.position="bottom",legend.direction="vertical")+ | 574 theme(legend.position="bottom",legend.direction="vertical")+ |
480 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ | 575 guides(fill=guide_legend(ncol=4,byrow=TRUE))+ |
481 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) | 576 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) |
482 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | 577 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") |
483 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 578 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
484 print(prediction_plot) | 579 print(prediction_plot) |
485 | 580 |
486 ## optional output as .RData | 581 ## correctness plot |
487 #if $output_rdata: | 582 correctness_plot = ggplot(opls_classes2, aes(x=x, y=y, fill=correct))+ |
583 geom_tile() + | |
584 coord_fixed()+ | |
585 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ | |
586 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ | |
587 theme_bw()+ | |
588 theme( | |
589 plot.background = element_blank(), | |
590 panel.grid.major = element_blank(), | |
591 panel.grid.minor = element_blank())+ | |
592 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
593 theme(legend.position="bottom",legend.direction="vertical")+ | |
594 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | |
595 coord_labels = aggregate(cbind(x,y)~correct, data=opls_classes2, mean, na.rm=TRUE, na.action="na.pass") | |
596 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | |
597 print(correctness_plot) | |
598 | |
599 | |
600 ## optional output as .RData | |
601 #if $output_rdata: | |
488 save(msidata.opls, file="$classification_rdata") | 602 save(msidata.opls, file="$classification_rdata") |
489 #end if | 603 #end if |
490 #end if | 604 #end if |
491 | 605 |
492 | 606 |
493 ######################## SSC ############################# | 607 ######################## SSC ############################# |
494 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | 608 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": |
500 | 614 |
501 ## set variables for components and number of response groups | 615 ## set variables for components and number of response groups |
502 number_groups = length(levels(y_vector)) | 616 number_groups = length(levels(y_vector)) |
503 | 617 |
504 ## SSC-cvApply: | 618 ## SSC-cvApply: |
505 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | 619 msidata.cv.ssc <- crossValidate(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") |
620 | |
506 | 621 |
507 ## remove msidata to clean up RAM space | 622 ## remove msidata to clean up RAM space |
508 rm(msidata) | 623 rm(msidata) |
509 gc() | 624 gc() |
510 | 625 |
511 ## create table with summary | 626 ## new table and plot of accuracies over all components |
512 count = 1 | 627 summary.cv.ssc = as.data.frame(summary(msidata.cv.ssc)) |
513 summary_ssccv = list() | 628 summary.cv.ssc.round <- round(summary.cv.ssc, digits=2) |
514 accuracy_vector = numeric() | 629 |
515 iteration_vector = character() | |
516 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){ | |
517 | |
518 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]] | |
519 ## change class of numbers into numeric to round and calculate mean | |
520 summary_iteration2 = round(as.numeric(summary_iteration), digits=2) | |
521 summary_matrix = matrix(summary_iteration2, nrow=4, ncol=number_groups) | |
522 accuracy_vector[count] = mean(summary_matrix[1,]) ## vector with accuracies to find later maximum for plot | |
523 summary_iteration3 = cbind(rownames(summary_iteration), summary_matrix) ## include rownames in table | |
524 summary_iteration4 = t(summary_iteration3) | |
525 summary_iteration5 = cbind(c(iteration, colnames(summary_iteration)), summary_iteration4) | |
526 summary_ssccv[[count]] = summary_iteration5 | |
527 iteration_vector[count] = unlist(strsplit(iteration, "[,]"))[3] | |
528 count = count+1} ## create list with summary table for each component | |
529 summary_ssccv = do.call(rbind, summary_ssccv) | |
530 summary_df = as.data.frame(summary_ssccv) | |
531 colnames(summary_df) = NULL | |
532 | |
533 ## plot to find parameters with highest accuracy | |
534 plot(c($type_cond.method_cond.ssc_s),accuracy_vector[!duplicated(iteration_vector)], type="o",ylab="Mean accuracy", xlab = "Shrinkage parameter (s)", main="Mean accuracy of SSC classification") | |
535 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy | |
536 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) | |
537 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | |
538 minimumy = min(coord(msidata.cv.ssc)[,2]) | |
539 maximumy = max(coord(msidata.cv.ssc)[,2]) | |
540 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy),layout=c(1,1), col=colourvector) | |
541 | |
542 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params: | |
543 write.table(r_value, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
544 write.table(s_value, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
545 #end if | |
546 | |
547 ## print table with summary in pdf | |
548 par(opar) | 630 par(opar) |
549 plot(0,type='n',axes=FALSE,ann=FALSE) | 631 plot(0,type='n',axes=FALSE,ann=FALSE) |
550 title(main="Summary for the different parameters\n", adj=0.5) | 632 title(main="Summary for the different parameters\n", adj=0.5) |
633 ## 20 rows fits in one page: | |
634 if (nrow(summary.cv.ssc.round)<=20){ | |
635 grid.table(summary.cv.ssc.round, rows= NULL) | |
636 }else{ | |
637 grid.table(summary.cv.ssc.round[1:20,], rows= NULL) | |
638 mincount = 21 | |
639 maxcount = 40 | |
640 for (count20 in 1:(ceiling(nrow(summary.cv.ssc.round)/20)-1)){ | |
641 plot(0,type='n',axes=FALSE,ann=FALSE) | |
642 if (maxcount <= nrow(summary.cv.ssc.round)){ | |
643 grid.table(summary.cv.ssc.round[mincount:maxcount,], rows= NULL) | |
644 mincount = mincount+20 | |
645 maxcount = maxcount+20 | |
646 }else{### stop last page with last sample otherwise NA in table | |
647 grid.table(summary.cv.ssc.round[mincount:nrow(summary.cv.ssc.round),], rows= NULL)} | |
648 } | |
649 } | |
650 | |
651 ## new accuracy plots | |
652 #if $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "TRUE": | |
653 accuracy_plot = ggplot(summary.cv.ssc, aes(x = s, y = Accuracy)) + | |
654 geom_point(color = "blue", size = 3) + # Add points | |
655 geom_line() + | |
656 theme_bw() + | |
657 facet_wrap(~ r) | |
658 | |
659 print(accuracy_plot) | |
660 | |
661 ## or as alternative accuracy plot for each r value on own page: | |
662 #elif $type_cond.method_cond.ssc_analysis_cond.ssc_cv_accuracy_plot == "FALSE": | |
663 unique_r_values <- unique(summary.cv.ssc\$r) | |
664 | |
665 for (r_value in unique_r_values) { | |
666 ## Create a subset for the current value of r | |
667 plot_data <- subset(summary.cv.ssc, r == r_value) | |
668 ## Create the accuracy plot for the current value of r | |
669 accuracy_plot <- ggplot(plot_data, aes(x = s, y = Accuracy)) + | |
670 geom_point(color = "blue", size = 3) + # Add points | |
671 geom_line() + | |
672 theme_bw() + | |
673 ggtitle(paste("Plot for r =", r_value)) + # Add a title | |
674 theme(plot.title = element_text(hjust = 0.5)) # Center the title | |
675 print(accuracy_plot) | |
676 } | |
677 #end if | |
678 | |
679 ## table with cv values per fold group for each combination of r and s | |
680 r_s_df = as.data.frame(msidata.cv.ssc@modelData@listData) | |
681 r_s_df\$parameter = paste0("r=", r_s_df\$r, " and s=", r_s_df\$s) | |
682 iteration = seq_along(r_s_df\$parameter) | |
683 | |
684 results_list <- NULL | |
685 for (i in iteration) { | |
686 ## extract accuracy, sensitivity, and specificity for the current i | |
687 accuracy <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["accuracy"]]), digits=2) | |
688 sensitivity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["sensitivity"]]), digits=2) | |
689 specificity <- round(as.data.frame(msidata.cv.ssc@resultData@listData[[i]][["specificity"]]), digits=2) | |
690 | |
691 ## combine accuracy, sensitivity, and specificity into one data frame | |
692 result_df <- cbind(folds = rownames(accuracy), parameter = r_s_df\$parameter[i], accuracy, sensitivity, specificity) | |
693 colnames(result_df) <- c("folds", "parameter", "accuracy", "sensitivity", "specificity") | |
694 rownames(result_df) <- NULL | |
695 | |
696 ## add column names as first row to each dataframe | |
697 col_names_row <- data.frame(folds = "folds", parameter = "parameter", accuracy = "accuracy", sensitivity = "sensitivity", specificity = "specificity") | |
698 result_df <- rbind(col_names_row, result_df) | |
699 | |
700 results_list[[i]] <- result_df | |
701 } | |
702 | |
703 ## combine all data frames in the list into one data frame | |
704 results_df <- do.call(rbind, results_list) | |
705 summary_df <- results_df | |
706 | |
707 par(opar) | |
708 plot(0,type='n',axes=FALSE,ann=FALSE) | |
709 title(main="More advanced folds output table: \n Summary for each fold\n", adj=0.5) | |
551 ## 20 rows fits in one page: | 710 ## 20 rows fits in one page: |
552 if (nrow(summary_df)<=20){ | 711 if (nrow(summary_df)<=20){ |
553 grid.table(summary_df, rows= NULL) | 712 grid.table(summary_df, rows= NULL) |
554 }else{ | 713 }else{ |
555 grid.table(summary_df[1:20,], rows= NULL) | 714 grid.table(summary_df[1:20,], rows= NULL) |
560 if (maxcount <= nrow(summary_df)){ | 719 if (maxcount <= nrow(summary_df)){ |
561 grid.table(summary_df[mincount:maxcount,], rows= NULL) | 720 grid.table(summary_df[mincount:maxcount,], rows= NULL) |
562 mincount = mincount+20 | 721 mincount = mincount+20 |
563 maxcount = maxcount+20 | 722 maxcount = maxcount+20 |
564 }else{### stop last page with last sample otherwise NA in table | 723 }else{### stop last page with last sample otherwise NA in table |
565 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | 724 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} |
566 } | 725 } |
567 } | 726 } |
727 | |
728 | |
729 ## new code to extract best r and s values | |
730 max_accuracy_index <- which.max(summary.cv.ssc\$Accuracy) | |
731 | |
732 ## extract the corresponding values of "r" and "s" | |
733 highest_accuracy_r <- summary.cv.ssc\$r[max_accuracy_index] | |
734 highest_accuracy_s <- summary.cv.ssc\$s[max_accuracy_index] | |
735 | |
736 #if $type_cond.method_cond.ssc_analysis_cond.write_best_params: | |
737 write.table(highest_accuracy_r, file="$best_r", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
738 write.table(highest_accuracy_s, file="$best_s", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
739 #end if | |
568 | 740 |
569 ## optional output as .RData | 741 ## optional output as .RData |
570 #if $output_rdata: | 742 #if $output_rdata: |
571 save(msidata.cv.ssc, file="$classification_rdata") | 743 save(msidata.cv.ssc, file="$classification_rdata") |
572 #end if | 744 #end if |
577 | 749 |
578 ## set variables for components and number of response groups | 750 ## set variables for components and number of response groups |
579 number_groups = length(levels(y_vector)) | 751 number_groups = length(levels(y_vector)) |
580 | 752 |
581 ## SSC analysis and plot | 753 ## SSC analysis and plot |
582 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, | 754 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") |
583 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | 755 print(plot(msidata.ssc, values = "statistic", model = list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), col=colourvector, lwd=2)) |
584 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s)), | |
585 col=colourvector, lwd=2) | |
586 | |
587 ### summary table SSC | |
588 ##############summary_table = summary(msidata.ssc) | |
589 | 756 |
590 ### stop if multiple values for r and s were used as input | 757 ### stop if multiple values for r and s were used as input |
591 tryCatch( | 758 tryCatch( |
592 { | 759 { |
593 | 760 |
600 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply | 767 ## in case user used multiple inputs for r or s stop - this is only possible in cv apply |
601 message("Error during SSC training") | 768 message("Error during SSC training") |
602 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)") | 769 message("Possible problem: multiple values for r or s selected - this is only possible in cvapply but not for spatial shrunken centroid analysis)") |
603 stop(call.=FALSE) | 770 stop(call.=FALSE) |
604 } | 771 } |
605 ) | 772 ) |
606 | 773 |
607 summary_table = summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] | 774 summary_df = as.data.frame(summary(msidata.ssc)) |
608 summary_table2 = round(as.numeric(summary_table), digits=2) | 775 summary_df = round(summary_df, digits=3) |
609 summary_matrix = matrix(summary_table2, nrow=4, ncol=number_groups) | 776 colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity") |
610 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | |
611 summary_table4 = t(summary_table3) | |
612 summary_table5 = cbind(c(names(msidata.ssc@resultData),colnames(summary_table)), summary_table4) | |
613 plot(0,type='n',axes=FALSE,ann=FALSE) | 777 plot(0,type='n',axes=FALSE,ann=FALSE) |
614 grid.table(summary_table5, rows= NULL) | 778 grid.table(summary_df, rows= NULL) |
615 | 779 |
616 ### image of the best m/z | 780 ## image of the best m/z |
617 minimumy = min(coord(msidata)[,2]) | 781 minimumy = min(coord(msidata)[,2]) |
618 maximumy = max(coord(msidata)[,2]) | 782 maximumy = max(coord(msidata)[,2]) |
619 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) | 783 print(image(msidata, mz = topFeatures(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), main="best m/z heatmap")) |
620 | 784 |
621 ## m/z and pixel information output | 785 ## m/z and pixel information output |
622 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) | 786 x_coords = msidata_coordinates@listData[["x"]] |
623 ssc_probabilities = data.frame(msidata.ssc\$probabilities[[1]]) | 787 y_coords = msidata_coordinates@listData[["y"]] |
624 | 788 pixel_names = paste0("xy_", x_coords, "_", y_coords) |
625 ## pixel names and coordinates | |
626 ## to remove potential sample names and z dimension, split at comma and take only x and y | |
627 x_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 1)) | |
628 y_coords = unlist(lapply(strsplit(names(pixels(msidata)), ","), `[[`, 2)) | |
629 x_coordinates = gsub("x = ","",x_coords) | |
630 y_coordinates = gsub(" y = ","",y_coords) | |
631 pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) | |
632 | 789 |
633 | 790 |
634 ## remove msidata to clean up RAM space | 791 ## remove msidata to clean up RAM space |
635 rm(msidata) | 792 rm(msidata) |
636 gc() | 793 gc() |
637 | 794 |
638 ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes, ssc_probabilities) | 795 |
639 colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition", levels(msidata.ssc\$classes[[1]])) | 796 ## toplabel (m/z features output) |
640 ssc_toplabels = topFeatures(msidata.ssc, n=Inf) | 797 ssc_toplabels = topFeatures(msidata.ssc, n=$type_cond.method_cond.ssc_toplabels) |
641 ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) | 798 ssc_toplabels@listData[["centers"]] = round (ssc_toplabels@listData[["centers"]], digits = 6) |
799 ssc_toplabels@listData[["statistic"]] = round (ssc_toplabels@listData[["statistic"]], digits = 6) | |
642 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 800 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
801 | |
802 print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="class", layout=c(1,1), main="Class Prediction")) | |
803 print(image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector, values="probability", layout=c(1,1), main="Class Probabilities")) | |
804 | |
805 | |
806 ## pixel output with correctness | |
807 ssc_classes = data.frame(msidata.ssc@resultData@listData[[1]][["class"]]) | |
808 colnames(ssc_classes) = "predicted_class" | |
809 ssc_classes\$predicted_class = ifelse(is.na(ssc_classes\$predicted_class), "NA", as.character(ssc_classes\$predicted_class)) | |
810 ssc_probabilities = data.frame(msidata.ssc@resultData@listData[[1]][["probability"]]) | |
811 | |
812 | |
813 ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes, ssc_probabilities, y_vector) | |
814 colnames(ssc_classes2) = c("pixel_names", "x", "y","predicted_classes", levels(msidata.ssc@resultData@listData[[1]][["class"]]), "annotated_class") | |
815 ssc_classes2\$correct<- ifelse(ssc_classes2\$predicted_classes==ssc_classes2\$annotated_class, T, F) | |
816 correctness = round(sum(ssc_classes2\$correct, na.rm = TRUE)/length(ssc_classes2\$correct)*100,2) | |
817 | |
643 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 818 write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
644 | 819 |
645 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), | 820 correctness_plot = ggplot(ssc_classes2, aes(x=x, y=y, fill=correct))+ |
646 col=colourvector, mode="classes", layout=c(1,1), main="Class Prediction") | |
647 image(msidata.ssc, model=list(r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s)), ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), | |
648 col=colourvector, mode="probabilities", layout=c(1,1), main="Class probabilities") | |
649 | |
650 | |
651 ## image with right and wrong classes: | |
652 prediction_df = cbind(coord(msidata.ssc)[,1:2], ssc_classes) | |
653 colnames(prediction_df) = c("x", "y", "predicted_classes") | |
654 comparison_df = cbind(prediction_df, y_vector) | |
655 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$y_vector, T, F) | |
656 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2) | |
657 | |
658 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ | |
659 geom_tile() + | 821 geom_tile() + |
660 coord_fixed()+ | 822 coord_fixed()+ |
661 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ | 823 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ |
662 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ | 824 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ |
663 theme_bw()+ | 825 theme_bw()+ |
664 theme( | 826 theme( |
665 plot.background = element_blank(), | 827 plot.background = element_blank(), |
666 panel.grid.major = element_blank(), | 828 panel.grid.major = element_blank(), |
667 panel.grid.minor = element_blank())+ | 829 panel.grid.minor = element_blank())+ |
668 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 830 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
669 theme(legend.position="bottom",legend.direction="vertical")+ | 831 theme(legend.position="bottom",legend.direction="vertical")+ |
670 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | 832 guides(fill=guide_legend(ncol=2,byrow=TRUE)) |
671 ## coord_labels = aggregate(cbind(x,y)~correct, data=comparison_df, mean, na.rm=TRUE, na.action="na.pass") | 833 coord_labels = aggregate(cbind(x,y)~correct, data=ssc_classes2, mean, na.rm=TRUE, na.action="na.pass") |
672 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) | 834 ##coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$predicted_classes) |
673 print(correctness_plot) | 835 print(correctness_plot) |
836 | |
674 | 837 |
675 ## optional output as .RData | 838 ## optional output as .RData |
676 #if $output_rdata: | 839 #if $output_rdata: |
677 save(msidata.ssc, file="$classification_rdata") | 840 save(msidata.ssc, file="$classification_rdata") |
678 #end if | 841 #end if |
679 | |
680 #end if | 842 #end if |
681 #end if | 843 #end if |
682 | 844 |
683 | 845 |
684 | 846 |
694 print("new response") | 856 print("new response") |
695 | 857 |
696 new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE) | 858 new_y_tabular = read.delim("$type_cond.new_y_values_cond.new_response_file", header = $type_cond.new_y_values_cond.new_tabular_header, stringsAsFactors = FALSE) |
697 new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)] | 859 new_y_input = new_y_tabular[,c($type_cond.new_y_values_cond.column_new_x, $type_cond.new_y_values_cond.column_new_y, $type_cond.new_y_values_cond.column_new_response)] |
698 colnames(new_y_input)[1:2] = c("x", "y") | 860 colnames(new_y_input)[1:2] = c("x", "y") |
861 | |
699 ## merge with coordinate information of msidata | 862 ## merge with coordinate information of msidata |
700 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | 863 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) |
701 colnames(msidata_coordinates)[3] = "pixel_index" | 864 colnames(msidata_coordinates)[3] = "pixel_index" |
702 merged_response = merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE) | 865 merged_response = as.data.frame(merge(msidata_coordinates, new_y_input, by=c("x", "y"), all.x=TRUE)) |
703 merged_response[is.na(merged_response)] = "NA" | 866 merged_response[is.na(merged_response)] = "NA" |
704 merged_response = merged_response[order(merged_response\$pixel_index),] | 867 merged_response = merged_response[order(merged_response\$pixel_index),] |
705 new_y_vector = as.factor(merged_response[,4]) | 868 new_y_vector = as.factor(merged_response[,4]) |
706 prediction = predict(training_data,msidata, newy = new_y_vector) | 869 |
707 | 870 prediction = predict(training_data, msidata, newy = new_y_vector) |
871 | |
708 ##numbers of levels for colour selection | 872 ##numbers of levels for colour selection |
709 number_levels = length(levels(new_y_vector)) | 873 number_levels = length(levels(new_y_vector)) |
710 | 874 |
711 ## Summary table prediction | 875 ##new summary table |
712 summary_table = summary(prediction)\$accuracy[[names(prediction@resultData)]] | 876 |
713 summary_table2 = round(as.numeric(summary_table), digits=2) | 877 ##if SSC classification, summary table has more results: |
714 summary_matrix = matrix(summary_table2, nrow=4, ncol=ncol(summary_table)) | 878 #if str($type_cond.classification_type) == "SSC_classifier": |
715 summary_table3 = cbind(rownames(summary_table), summary_matrix) ## include rownames in table | 879 print("SSC classification summary") |
716 summary_table4 = t(summary_table3) | 880 |
717 summary_table5 = cbind(c(names(prediction@resultData),colnames(summary_table)), summary_table4) | 881 summary_df = as.data.frame(summary(prediction)) |
718 plot(0,type='n',axes=FALSE,ann=FALSE) | 882 summary_df = round(summary_df, digits=3) |
719 grid.table(summary_table5, rows= NULL) | 883 colnames(summary_df) = c("Radius r", "Shrinkage s", "Features/Class", "Accuracy", "Sensitivity", "Specificity") |
720 | 884 plot(0,type='n',axes=FALSE,ann=FALSE) |
885 grid.table(summary_df, rows= NULL) | |
886 | |
887 ## else PLS or OPLS classifier: | |
888 #else | |
889 print("PLS/OPLS classifier") | |
890 summary_df = as.data.frame(summary(prediction)) | |
891 colnames(summary_df) = c("Component", "Accuracy", "Sensitivity", "Specificity") | |
892 summary_df = round(summary_df, digits = 2) | |
893 plot(0,type='n',axes=FALSE,ann=FALSE) | |
894 grid.table(summary_df, rows= NULL) | |
895 | |
896 #end if | |
897 | |
898 | |
899 ##else for prediction without a new annotation (no calculation of accuracy): | |
721 #else | 900 #else |
722 prediction = predict(training_data,msidata) | 901 prediction = predict(training_data, msidata) |
723 number_levels = length(levels(training_data\$y[[1]])) | 902 number_levels = length(levels(training_data@resultData@listData[[1]][["class"]])) |
724 #end if | 903 #end if |
725 | 904 |
726 ## colours selection: | 905 ## colours selection: |
727 | 906 |
728 #if str($colour_conditional.colour_type) == "manual_colour" | 907 #if str($colour_conditional.colour_type) == "manual_colour" |
729 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) | 908 #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) |
730 colourvector = c($color_string) | 909 colourvector = c($color_string) |
731 | 910 |
732 #elif str($colour_conditional.colour_type) == "colourpalette" | 911 #elif str($colour_conditional.colour_type) == "colourpalette" |
733 colourvector = noquote($colour_conditional.palettes)(number_levels) | 912 colourvector = noquote($colour_conditional.palettes)(number_levels) |
734 | 913 |
735 #end if | 914 #end if |
736 | 915 |
737 ## m/z and pixel information output | 916 ## m/z and pixel information output |
738 predicted_classes = data.frame(prediction\$classes[[1]]) | 917 predicted_classes = data.frame(prediction@resultData@listData[[1]][["class"]]) |
739 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | 918 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) |
740 pixel_names = gsub(" = ", "y_", pixel_names) | 919 colnames(msidata_coordinates)[3] = "pixel_index" |
741 x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] | 920 x_coords = msidata_coordinates@listData[["x"]] |
742 y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] | 921 y_coords = msidata_coordinates@listData[["y"]] |
743 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) | 922 pixel_names = paste0("xy_", x_coords, "_", y_coords) |
923 predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes) | |
744 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") | 924 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") |
745 predicted_toplabels = topFeatures(prediction, n=Inf) | 925 |
746 if (colnames(predicted_toplabels)[4] == "coefficients"){ | 926 ##topFeatures only available for SSC; for PLS and OPLS coefficients loading and weights are provided |
747 predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) | 927 |
748 }else{ | 928 #if str($type_cond.classification_type) == "SSC_classifier": |
749 predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} | 929 predicted_toplabels = topFeatures(prediction, n=$type_cond.classification_type_cond.predicted_toplabels) |
750 | 930 predicted_toplabels <- as.data.frame(predicted_toplabels) |
931 predicted_toplabels[,6:7] <-round(predicted_toplabels[,6:7], digits = 5) | |
932 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
933 | |
934 #else | |
935 ## if PLS or OPLS classifier, coefficients, loadings, and weights instead of topFeatures | |
936 coefficients.df = as.data.frame(prediction@resultData@listData[[1]][["coefficients"]]) | |
937 row_names <- prediction@featureData@mz | |
938 coefficients.df <- cbind("mz" = row_names, coefficients.df) | |
939 write.table(coefficients.df, file = "$coefficients", quote = FALSE, sep = "\t", row.names = FALSE) | |
940 | |
941 ## add loadings and weights table | |
942 loadings.df = as.data.frame(prediction@resultData@listData[[1]][["loadings"]]) | |
943 loadings.df <- cbind("mz" = row_names, loadings.df) | |
944 new_names <- paste0("loadings_", names(loadings.df)[-1]) | |
945 names(loadings.df)[-1] <- new_names | |
946 | |
947 weights.df = as.data.frame(prediction@resultData@listData[[1]][["weights"]]) | |
948 weights.df <- cbind("mz" = row_names, weights.df) | |
949 new_names <- paste0("weights_", names(weights.df)[-1]) | |
950 names(weights.df)[-1] <- new_names | |
951 | |
952 ## combine loading and weights table | |
953 merged.load.wei = merge(loadings.df, weights.df, by = "mz") | |
954 write.table(merged.load.wei, file = "$loadings_weights", quote = FALSE, sep = "\t", row.names = FALSE) | |
955 | |
956 #end if | |
957 | |
751 ##predicted classes | 958 ##predicted classes |
752 prediction_df = cbind(coord(prediction)[,1:2], predicted_classes) | 959 prediction_df = as.data.frame(cbind(coord(prediction)[,1:2], predicted_classes)) |
753 colnames(prediction_df) = c("x", "y", "predicted_classes") | 960 colnames(prediction_df) = c("x", "y", "predicted_classes") |
754 | 961 |
755 #if str($type_cond.classification_type) == "SSC_classifier": | 962 #if str($type_cond.classification_type) == "SSC_classifier": |
756 ## this seems to work only for SSC, therefore overwrite tables | 963 ## this seems to work only for SSC, therefore overwrite tables |
757 predicted_probabilities = data.frame(prediction\$probabilities[[1]]) | 964 predicted_probabilities = data.frame(prediction@resultData@listData[[1]][["probability"]]) |
758 predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes, predicted_probabilities) | 965 predicted_classes2 = data.frame(pixel_names, x_coords, y_coords, predicted_classes, predicted_probabilities) |
759 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction\$classes[[1]])) | 966 colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition", levels(prediction@resultData@listData[[1]][["class"]])) |
760 ## also image modes are specific to SSC | 967 ## also image modes are specific to SSC |
761 image(prediction, mode="classes", layout=c(1,1), main="Class", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector) | 968 print(predicted_classes2[1:5,]) |
762 image(prediction, mode="probabilities", layout=c(1,1), main="Class probabilities",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector) | 969 print(image(prediction, values="class", layout=c(1,1), main="Class Prediction", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector)) |
763 | 970 print(image(prediction, values="probability", layout=c(1,1), main="Class Probabilities",ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), col=colourvector)) |
764 #else | 971 |
765 | 972 #else |
766 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ | 973 prediction_plot = ggplot(prediction_df, aes(x=x, y=y, fill=predicted_classes))+ |
767 geom_tile()+ | 974 geom_tile()+ |
768 coord_fixed()+ | 975 coord_fixed()+ |
769 ggtitle("Predicted condition for each spectrum")+ | 976 ggtitle("Predicted condition for each spectrum")+ |
770 theme_bw()+ | 977 theme_bw()+ |
771 theme( | 978 theme( |
775 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 982 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
776 theme(legend.position="bottom", legend.direction="vertical")+ | 983 theme(legend.position="bottom", legend.direction="vertical")+ |
777 guides(fill=guide_legend(ncol=4, byrow=TRUE))+ | 984 guides(fill=guide_legend(ncol=4, byrow=TRUE))+ |
778 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) | 985 scale_discrete_manual(aesthetics = c("colour", "fill"), values = colourvector) |
779 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") | 986 coord_labels = aggregate(cbind(x,y)~predicted_classes, data=prediction_df, mean, na.rm=TRUE, na.action="na.pass") |
780 coord_labels\$file_number = gsub( "_.*§", "", coord_labels\$predicted_classes) | 987 ##coord_labels\$file_number = gsub( "_.*§", "", coord_labels\$predicted_classes) |
781 print(prediction_plot) | 988 print(prediction_plot) |
782 #end if | 989 #end if |
783 | 990 |
784 write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
785 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
786 | |
787 | |
788 | |
789 | |
790 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": | 991 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": |
791 ## image with right and wrong classes: | 992 ## image with right and wrong classes: |
792 | 993 comparison_df = as.data.frame(cbind(prediction_df, new_y_vector)) |
793 comparison_df = cbind(prediction_df, new_y_vector) | 994 colnames(comparison_df) = c("x", "y", "predicted_class", "annotated_class") |
794 comparison_df\$correct<- ifelse(comparison_df\$predicted_classes==comparison_df\$new_y_vector, T, F) | 995 comparison_df\$predicted_class = ifelse(is.na(comparison_df\$predicted_class), "NA", as.character(comparison_df\$predicted_class)) |
795 correctness = round(sum(comparison_df\$correct)/length(comparison_df\$correct)*100,2) | 996 comparison_df\$correct <- ifelse(comparison_df\$predicted_class==comparison_df\$annotated_class, T, F) |
796 | 997 correctness = round(sum(comparison_df\$correct, na.rm = TRUE)/length(comparison_df\$correct)*100,2) |
797 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ | 998 |
999 correctness_plot = ggplot(comparison_df, aes(x=x, y=y, fill=correct))+ | |
798 geom_tile()+ | 1000 geom_tile()+ |
799 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ | 1001 scale_fill_manual(values = c("TRUE" = "orange","FALSE" = "darkblue"))+ |
800 coord_fixed()+ | 1002 coord_fixed()+ |
801 ggtitle(paste0("Correctness of classification: ",correctness, "%"))+ | 1003 ggtitle(paste0("Correctness of classification: ", correctness, " %"))+ |
802 theme_bw()+ | 1004 theme_bw()+ |
803 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | 1005 theme(text=element_text(family="ArialMT", face="bold", size=15))+ |
804 theme(legend.position="bottom",legend.direction="vertical")+ | 1006 theme(legend.position="bottom",legend.direction="vertical")+ |
805 guides(fill=guide_legend(ncol=2,byrow=TRUE)) | 1007 guides(fill=guide_legend(ncol=2,byrow=TRUE)) |
806 print(correctness_plot) | 1008 print(correctness_plot) |
807 #end if | 1009 #end if |
1010 | |
1011 ## pixel output | |
1012 #if str($type_cond.new_y_values_cond.new_y_values) == "new_response": | |
1013 print("new response output") | |
1014 write.table(comparison_df, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
1015 | |
1016 #else | |
1017 write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
1018 | |
1019 #end if | |
808 | 1020 |
809 ## optional output as .RData | 1021 ## optional output as .RData |
810 #if $output_rdata: | 1022 #if $output_rdata: |
811 msidata = prediction | 1023 msidata = prediction |
812 save(msidata, file="$classification_rdata") | 1024 save(msidata, file="$classification_rdata") |
815 #end if | 1027 #end if |
816 | 1028 |
817 dev.off() | 1029 dev.off() |
818 | 1030 |
819 }else{ | 1031 }else{ |
1032 plot.new() | |
1033 text(0.5, 0.5, "Inputfile has no intensities > 0 \n or contains NA values.", cex = 1.5) | |
820 print("Inputfile has no intensities > 0 or contains NA values") | 1034 print("Inputfile has no intensities > 0 or contains NA values") |
821 dev.off() | 1035 dev.off() |
822 } | 1036 } |
823 | 1037 |
824 | 1038 |
862 </when> | 1076 </when> |
863 <when value="PLS_analysis"> | 1077 <when value="PLS_analysis"> |
864 <param name="pls_comp" type="integer" value="5" | 1078 <param name="pls_comp" type="integer" value="5" |
865 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/> | 1079 label="The optimal number of PLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of PLS-DA components"/> |
866 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> | 1080 <param name="pls_scale" type="boolean" label="Data scaling" truevalue="TRUE" falsevalue="FALSE"/> |
867 <param name="pls_toplabels" type="integer" value="100" | 1081 <param name="PLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/> |
868 label="Number of toplabels (m/z features) which should be written in tabular output"/> | 1082 <!--param name="pls_toplabels" type="integer" value="100 |
1083 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | |
869 </when> | 1084 </when> |
870 </conditional> | 1085 </conditional> |
871 </when> | 1086 </when> |
872 | 1087 |
873 <when value="OPLS"> | 1088 <when value="OPLS"> |
889 <when value="opls_analysis"> | 1104 <when value="opls_analysis"> |
890 <param name="opls_comp" type="integer" value="5" | 1105 <param name="opls_comp" type="integer" value="5" |
891 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/> | 1106 label="The optimal number of OPLS-DA components as indicated by cross-validations (minimum is 1)" help="Run cvApply first to optain optimal number of OPLS-DA components"/> |
892 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> | 1107 <!--param name="xnew" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/--> |
893 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> | 1108 <param name="opls_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Data scaling"/> |
1109 <param name="OPLS_Yweights" type="boolean" label="Y weights" help="Y weights represent the coefficients associated with the response variables and are used to model the relationship between predictors and responses in the context of classification. They represent the importance of each response variable in predicting each component. They can be useful if you have multiple response variables."/> | |
894 <!--param name="opls_toplabels" type="integer" value="100" | 1110 <!--param name="opls_toplabels" type="integer" value="100" |
895 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | 1111 label="Number of toplabels (m/z features) which should be written in tabular output"/--> |
896 </when> | 1112 </when> |
897 </conditional> | 1113 </conditional> |
898 </when> | 1114 </when> |
903 <option value="ssc_cvapply" selected="True">cvApply</option> | 1119 <option value="ssc_cvapply" selected="True">cvApply</option> |
904 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | 1120 <option value="ssc_analysis">spatial shrunken centroids analysis</option> |
905 </param> | 1121 </param> |
906 <when value="ssc_cvapply"> | 1122 <when value="ssc_cvapply"> |
907 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/> | 1123 <param name="write_best_params" type="boolean" label="Write out best r and s values" help="Can be used to generate automatic classification workflow"/> |
1124 <param name="ssc_cv_accuracy_plot" type="boolean" label="Plot CV accuracy plots on one page (=Yes) or individual pages (=No)"/> | |
908 </when> | 1125 </when> |
909 <when value="ssc_analysis"> | 1126 <when value="ssc_analysis"> |
910 <!--param name="ssc_toplabels" type="integer" value="100" | 1127 <param name="ssc_toplabels" type="integer" value="100" |
911 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | 1128 label="Number of toplabels (m/z features) which should be written in tabular output"/> |
912 </when> | 1129 </when> |
913 </conditional> | 1130 </conditional> |
914 <param name="ssc_r" type="text" value="2" | 1131 <param name="ssc_r" type="text" value="2" |
915 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)"> | 1132 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 0,1,2,3 or 2:5)"> |
916 <expand macro="sanitizer_multiple_digits"/> | 1133 <expand macro="sanitizer_multiple_digits"/> |
920 <expand macro="sanitizer_multiple_digits"/> | 1137 <expand macro="sanitizer_multiple_digits"/> |
921 </param> | 1138 </param> |
922 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | 1139 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> |
923 <option value="gaussian">gaussian</option> | 1140 <option value="gaussian">gaussian</option> |
924 <option value="adaptive" selected="True">adaptive</option> | 1141 <option value="adaptive" selected="True">adaptive</option> |
925 </param> | 1142 </param> |
926 </when> | 1143 </when> |
927 </conditional> | 1144 </conditional> |
928 | 1145 |
929 </when> | 1146 </when> |
930 | 1147 |
931 <when value="prediction"> | 1148 <when value="prediction"> |
932 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | 1149 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> |
933 <!--param name="predicted_toplabels" type="integer" value="100" | 1150 <conditional name="classification_type_cond"> |
934 label="Number of toplabels (m/z features) which should be written in tabular output"/--> | 1151 <param name="classification_type" type="select" label="Which classification method was used"> |
935 <param name="classification_type" type="select" display="radio" optional="False" label="Which classification method was used"> | 1152 <option value="PLS_classifier" selected="True" >PLS classifier</option> |
936 <option value="PLS_classifier" selected="True" >PLS classifier</option> | 1153 <option value="OPLS_classifier">OPLS classifier</option> |
937 <option value="OPLS_classifier">OPLS classifier</option> | 1154 <option value="SSC_classifier">SSC classifier</option> |
938 <option value="SSC_classifier">SSC_classifier</option> | 1155 </param> |
939 </param> | 1156 <when value="PLS_classifier"/> |
1157 <when value="OPLS_classifier"/> | |
1158 <when value="SSC_classifier"> | |
1159 <param name="predicted_toplabels" type="integer" value="100" | |
1160 label="Number of toplabels (m/z features) which should be written in tabular output"/> | |
1161 </when> | |
1162 </conditional> | |
940 <conditional name="new_y_values_cond"> | 1163 <conditional name="new_y_values_cond"> |
941 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)"> | 1164 <param name="new_y_values" type="select" label="Load annotations (optional, but allows accuracy calculations)"> |
942 <option value="no_new_response" selected="True">no</option> | 1165 <option value="no_new_response" selected="True">no</option> |
943 <option value="new_response">use annotations</option> | 1166 <option value="new_response">use annotations</option> |
944 </param> | 1167 </param> |
982 </conditional> | 1205 </conditional> |
983 <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> | 1206 <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> |
984 </inputs> | 1207 </inputs> |
985 <outputs> | 1208 <outputs> |
986 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> | 1209 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> |
987 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> | 1210 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"> |
988 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> | 1211 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'SSC_classifier'</filter> |
1212 </data> | |
1213 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"> | |
1214 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_analysis' or type_cond['type_method'] == 'prediction'</filter> | |
1215 </data> | |
1216 <data format="tabular" name="coefficients" label="${tool.name} on ${on_string}: coefficients"> | |
1217 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter> | |
1218 </data> | |
1219 <data format="tabular" name="loadings_weights" label="${tool.name} on ${on_string}: loadings and weights"> | |
1220 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'PLS' and type_cond['method_cond']['analysis_cond']['PLS_method'] == 'PLS_analysis' or type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'OPLS' and type_cond['method_cond']['opls_analysis_cond']['opls_method'] == 'opls_analysis' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'PLS_classifier' or type_cond['type_method'] == 'prediction' and type_cond['classification_type_cond']['classification_type'] == 'OPLS_classifier'</filter> | |
1221 </data> | |
989 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r"> | 1222 <data format="txt" name="best_r" label="${tool.name} on ${on_string}:best r"> |
990 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> | 1223 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> |
991 </data> | 1224 </data> |
992 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s"> | 1225 <data format="txt" name="best_s" label="${tool.name} on ${on_string}:best s"> |
993 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> | 1226 <filter>type_cond['type_method'] == 'training' and type_cond['method_cond']['class_method'] == 'spatialShrunkenCentroids' and type_cond['method_cond']['ssc_analysis_cond']['ssc_method'] == 'ssc_cvapply' and type_cond['method_cond']['ssc_analysis_cond']['write_best_params']</filter> |
994 </data> | 1227 </data> |
995 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> | 1228 <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData"> |
996 <filter>output_rdata</filter> | 1229 <filter>output_rdata</filter> |
997 </data> | 1230 </data> |
998 </outputs> | 1231 </outputs> |
999 <tests> | 1232 <tests> |
1000 <test expect_num_outputs="3"> | 1233 <test expect_num_outputs="1"> |
1001 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1234 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1002 <conditional name="type_cond"> | 1235 <conditional name="type_cond"> |
1003 <param name="type_method" value="training"/> | 1236 <param name="type_method" value="training"/> |
1004 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> | 1237 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> |
1005 <param name="column_x" value="1"/> | 1238 <param name="column_x" value="1"/> |
1013 <param name="PLS_method" value="cvapply"/> | 1246 <param name="PLS_method" value="cvapply"/> |
1014 <param name="plscv_comp" value="2:4"/> | 1247 <param name="plscv_comp" value="2:4"/> |
1015 </conditional> | 1248 </conditional> |
1016 </conditional> | 1249 </conditional> |
1017 </conditional> | 1250 </conditional> |
1018 <output name="mzfeatures" file="features_test1.tabular"/> | |
1019 <output name="pixeloutput" file="pixels_test1.tabular"/> | |
1020 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/> | 1251 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="2000"/> |
1021 </test> | 1252 </test> |
1022 | 1253 |
1023 <test expect_num_outputs="4"> | 1254 <test expect_num_outputs="5"> |
1024 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1255 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1025 <conditional name="type_cond"> | 1256 <conditional name="type_cond"> |
1026 <param name="type_method" value="training"/> | 1257 <param name="type_method" value="training"/> |
1027 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> | 1258 <param name="annotation_file" value= "pixel_annotation_file1.tabular" ftype="tabular"/> |
1028 <param name="column_x" value="1"/> | 1259 <param name="column_x" value="1"/> |
1033 <param name="class_method" value="PLS"/> | 1264 <param name="class_method" value="PLS"/> |
1034 <conditional name="analysis_cond"> | 1265 <conditional name="analysis_cond"> |
1035 <param name="PLS_method" value="PLS_analysis"/> | 1266 <param name="PLS_method" value="PLS_analysis"/> |
1036 <param name="pls_comp" value="2"/> | 1267 <param name="pls_comp" value="2"/> |
1037 <param name="pls_scale" value="TRUE"/> | 1268 <param name="pls_scale" value="TRUE"/> |
1269 <param name="PLS_Yweights" value="TRUE"/> | |
1038 <!--param name="pls_toplabels" value="100"/--> | 1270 <!--param name="pls_toplabels" value="100"/--> |
1039 </conditional> | 1271 </conditional> |
1040 </conditional> | 1272 </conditional> |
1041 </conditional> | 1273 </conditional> |
1042 <param name="output_rdata" value="True"/> | 1274 <param name="output_rdata" value="True"/> |
1043 <output name="mzfeatures" file="features_test2.tabular"/> | 1275 <output name="coefficients"> |
1276 <assert_contents> | |
1277 <has_text text="900.004699707031"/> | |
1278 <has_text text="962.870727539062"/> | |
1279 <has_text text="999.606872558594"/> | |
1280 </assert_contents> | |
1281 </output> | |
1282 <output name="loadings_weights"> | |
1283 <assert_contents> | |
1284 <has_text text="900.076354980469"/> | |
1285 <has_text text="950.495910644531"/> | |
1286 <has_text text="989.024536132812"/> | |
1287 </assert_contents> | |
1288 </output> | |
1044 <output name="pixeloutput" file="pixels_test2.tabular"/> | 1289 <output name="pixeloutput" file="pixels_test2.tabular"/> |
1045 <output name="classification_images" file="test2.pdf" compare="sim_size"/> | 1290 <output name="classification_images" file="test2.pdf" compare="sim_size"/> |
1046 <output name="classification_rdata" file="test2.rdata" compare="sim_size"/> | 1291 <output name="classification_rdata" file="test2.rdata" compare="sim_size"/> |
1047 </test> | 1292 </test> |
1048 | 1293 |
1049 <test expect_num_outputs="3"> | 1294 <test expect_num_outputs="1"> |
1050 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1295 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1051 <conditional name="type_cond"> | 1296 <conditional name="type_cond"> |
1052 <param name="type_method" value="training"/> | 1297 <param name="type_method" value="training"/> |
1053 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> | 1298 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> |
1054 <param name="column_x" value="1"/> | 1299 <param name="column_x" value="1"/> |
1059 <conditional name="method_cond"> | 1304 <conditional name="method_cond"> |
1060 <param name="class_method" value="OPLS"/> | 1305 <param name="class_method" value="OPLS"/> |
1061 <conditional name="opls_analysis_cond"> | 1306 <conditional name="opls_analysis_cond"> |
1062 <param name="opls_method" value="opls_cvapply"/> | 1307 <param name="opls_method" value="opls_cvapply"/> |
1063 <param name="opls_cvcomp" value="1:2"/> | 1308 <param name="opls_cvcomp" value="1:2"/> |
1064 <param name="xnew_cv" value="FALSE"/> | |
1065 </conditional> | 1309 </conditional> |
1066 </conditional> | 1310 </conditional> |
1067 </conditional> | 1311 </conditional> |
1068 <output name="mzfeatures" file="features_test3.tabular"/> | |
1069 <output name="pixeloutput" file="pixels_test3.tabular"/> | |
1070 <output name="classification_images" file="test3.pdf" compare="sim_size"/> | 1312 <output name="classification_images" file="test3.pdf" compare="sim_size"/> |
1071 </test> | 1313 </test> |
1072 | 1314 |
1073 <test expect_num_outputs="4"> | 1315 <test expect_num_outputs="5"> |
1074 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1316 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1075 <conditional name="type_cond"> | 1317 <conditional name="type_cond"> |
1076 <param name="type_method" value="training"/> | 1318 <param name="type_method" value="training"/> |
1077 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> | 1319 <param name="annotation_file" value= "random_factors.tabular" ftype="tabular"/> |
1078 <param name="column_x" value="1"/> | 1320 <param name="column_x" value="1"/> |
1082 <conditional name="method_cond"> | 1324 <conditional name="method_cond"> |
1083 <param name="class_method" value="OPLS"/> | 1325 <param name="class_method" value="OPLS"/> |
1084 <conditional name="opls_analysis_cond"> | 1326 <conditional name="opls_analysis_cond"> |
1085 <param name="opls_method" value="opls_analysis"/> | 1327 <param name="opls_method" value="opls_analysis"/> |
1086 <param name="opls_comp" value="3"/> | 1328 <param name="opls_comp" value="3"/> |
1087 <param name="xnew" value="FALSE"/> | |
1088 <param name="opls_scale" value="FALSE"/> | 1329 <param name="opls_scale" value="FALSE"/> |
1089 <!--param name="opls_toplabels" value="100"/--> | 1330 <param name="PLS_Yweights" value="FALSE"/> |
1090 </conditional> | 1331 </conditional> |
1091 </conditional> | 1332 </conditional> |
1092 </conditional> | 1333 </conditional> |
1093 <param name="output_rdata" value="True"/> | 1334 <param name="output_rdata" value="True"/> |
1094 <output name="mzfeatures" file="features_test4.tabular"/> | |
1095 <output name="pixeloutput" file="pixels_test4.tabular"/> | 1335 <output name="pixeloutput" file="pixels_test4.tabular"/> |
1336 <output name="coefficients"> | |
1337 <assert_contents> | |
1338 <has_text text="900.148010253906"/> | |
1339 <has_text text="974.132446289062"/> | |
1340 <has_text text="999.908935546875"/> | |
1341 </assert_contents> | |
1342 </output> | |
1343 <output name="loadings_weights"> | |
1344 <assert_contents> | |
1345 <has_text text="901.581848144531"/> | |
1346 <has_text text="939.189086914062"/> | |
1347 <has_text text="984.185363769531"/> | |
1348 </assert_contents> | |
1349 </output> | |
1096 <output name="classification_images" file="test4.pdf" compare="sim_size"/> | 1350 <output name="classification_images" file="test4.pdf" compare="sim_size"/> |
1097 <output name="classification_rdata" file="test4.rdata" compare="sim_size"/> | 1351 <output name="classification_rdata" file="test4.rdata" compare="sim_size"/> |
1098 </test> | 1352 </test> |
1099 | 1353 |
1100 <test expect_num_outputs="3"> | 1354 <test expect_num_outputs="3"> |
1112 <conditional name="ssc_analysis_cond"> | 1366 <conditional name="ssc_analysis_cond"> |
1113 <param name="ssc_method" value="ssc_cvapply"/> | 1367 <param name="ssc_method" value="ssc_cvapply"/> |
1114 <param name="ssc_r" value="1:2"/> | 1368 <param name="ssc_r" value="1:2"/> |
1115 <param name="ssc_s" value="2:3"/> | 1369 <param name="ssc_s" value="2:3"/> |
1116 <param name="ssc_kernel_method" value="adaptive"/> | 1370 <param name="ssc_kernel_method" value="adaptive"/> |
1371 <param name="write_best_params" value="TRUE"/> | |
1117 </conditional> | 1372 </conditional> |
1118 </conditional> | 1373 </conditional> |
1119 </conditional> | 1374 </conditional> |
1120 <output name="mzfeatures" file="features_test5.tabular"/> | |
1121 <output name="pixeloutput" file="pixels_test5.tabular"/> | |
1122 <output name="classification_images" file="test5.pdf" compare="sim_size"/> | 1375 <output name="classification_images" file="test5.pdf" compare="sim_size"/> |
1376 <output name="best_r" file="best_r_test5.txt"/> | |
1377 <output name="best_s" file="best_s_test5.txt"/> | |
1123 </test> | 1378 </test> |
1124 | 1379 |
1125 <test expect_num_outputs="4"> | 1380 <test expect_num_outputs="4"> |
1126 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1381 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1127 <conditional name="type_cond"> | 1382 <conditional name="type_cond"> |
1132 <param name="column_response" value="4"/> | 1387 <param name="column_response" value="4"/> |
1133 <conditional name="method_cond"> | 1388 <conditional name="method_cond"> |
1134 <param name="class_method" value="spatialShrunkenCentroids"/> | 1389 <param name="class_method" value="spatialShrunkenCentroids"/> |
1135 <conditional name="ssc_analysis_cond"> | 1390 <conditional name="ssc_analysis_cond"> |
1136 <param name="ssc_method" value="ssc_analysis"/> | 1391 <param name="ssc_method" value="ssc_analysis"/> |
1137 <!--param name="ssc_toplabels" value="20"/--> | 1392 <param name="ssc_toplabels" value="20"/> |
1138 </conditional> | 1393 </conditional> |
1139 <param name="ssc_r" value="2"/> | 1394 <param name="ssc_r" value="2"/> |
1140 <param name="ssc_s" value="2"/> | 1395 <param name="ssc_s" value="2"/> |
1141 <param name="ssc_kernel_method" value="adaptive"/> | 1396 <param name="ssc_kernel_method" value="adaptive"/> |
1142 </conditional> | 1397 </conditional> |
1146 <output name="pixeloutput" file="pixels_test6.tabular"/> | 1401 <output name="pixeloutput" file="pixels_test6.tabular"/> |
1147 <output name="classification_images" file="test6.pdf" compare="sim_size"/> | 1402 <output name="classification_images" file="test6.pdf" compare="sim_size"/> |
1148 <output name="classification_rdata" file="test6.rdata" compare="sim_size" delta="15000"/> | 1403 <output name="classification_rdata" file="test6.rdata" compare="sim_size" delta="15000"/> |
1149 </test> | 1404 </test> |
1150 | 1405 |
1151 <test expect_num_outputs="4"> | 1406 <test expect_num_outputs="5"> |
1152 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | 1407 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> |
1153 <conditional name="type_cond"> | 1408 <conditional name="type_cond"> |
1154 <param name="type_method" value="prediction"/> | 1409 <param name="type_method" value="prediction"/> |
1410 <param name="type_method" value="prediction"/> | |
1155 <param name="training_result" value="test2.rdata" ftype="rdata"/> | 1411 <param name="training_result" value="test2.rdata" ftype="rdata"/> |
1412 <param name="classification_type" value="PLS_classifier"/> | |
1156 <conditional name="new_y_values_cond"> | 1413 <conditional name="new_y_values_cond"> |
1157 <param name="new_y_values" value="new_response"/> | 1414 <param name="new_y_values" value="new_response"/> |
1158 <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/> | 1415 <param name="new_response_file" value="pixel_annotation_file1.tabular" ftype="tabular"/> |
1159 <param name="column_new_x" value="1"/> | 1416 <param name="column_new_x" value="1"/> |
1160 <param name="column_new_y" value="2"/> | 1417 <param name="column_new_y" value="2"/> |
1161 <param name="column_new_response" value="4"/> | 1418 <param name="column_new_response" value="4"/> |
1162 <param name="new_tabular_header" value="False"/> | 1419 <param name="new_tabular_header" value="False"/> |
1163 </conditional> | 1420 </conditional> |
1164 </conditional> | 1421 </conditional> |
1165 <param name="output_rdata" value="True"/> | 1422 <param name="output_rdata" value="True"/> |
1166 <output name="mzfeatures" file="features_test7.tabular"/> | 1423 <output name="coefficients" file="coefficients_test7.tabular"/> |
1424 <output name="loadings_weights" file="loadings_and_weights_test7.tabular"/> | |
1167 <output name="pixeloutput" file="pixels_test7.tabular"/> | 1425 <output name="pixeloutput" file="pixels_test7.tabular"/> |
1168 <output name="classification_images" file="test7.pdf" compare="sim_size"/> | 1426 <output name="classification_images" file="test7.pdf" compare="sim_size"/> |
1169 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> | 1427 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> |
1170 </test> | 1428 </test> |
1171 </tests> | 1429 </tests> |
1221 | 1479 |
1222 | 1480 |
1223 **Tips** | 1481 **Tips** |
1224 | 1482 |
1225 - The classification function will only run on files with valid intensity values (NA are not allowed) | 1483 - The classification function will only run on files with valid intensity values (NA are not allowed) |
1226 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool. | 1484 - Only a single input file is accepted, several files have to be combined previously, for example with the MSI combine tool. |
1227 | 1485 |
1228 | 1486 |
1229 **Output** | 1487 **Output** |
1230 | 1488 |
1231 - Pdf with the heatmaps and plots for the classification | 1489 - Pdf with the heatmaps and plots for the classification |