Mercurial > repos > galaxyp > cardinal_segmentations
diff segmentation.xml @ 17:91f0f5922011 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:36:52 +0000 |
parents | 050bcc806da2 |
children |
line wrap: on
line diff
--- a/segmentation.xml Wed Apr 19 22:47:48 2023 +0000 +++ b/segmentation.xml Thu Jul 04 13:36:52 2024 +0000 @@ -1,11 +1,9 @@ -<tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.0"> +<tool id="cardinal_segmentations" name="MSI segmentation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description>mass spectrometry imaging spatial clustering</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements"> - <requirement type="package" version="2.3">r-gridextra</requirement> - </expand> + <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @@ -22,10 +20,12 @@ library(Cardinal) library(gridExtra) +library(ggplot2) +library(scales) @READING_MSIDATA@ - msidata = as(msidata, "MSImageSet") ##coercion to MSImageSet + msidata = as(msidata, "MSImagingExperiment") ## remove duplicated coordinates msidata <- msidata[,!duplicated(coord(msidata))] @@ -49,104 +49,74 @@ ############################################################################# grid.table(property_df, rows= NULL) -if (npeaks > 0 && sum(is.na(spectra(msidata)))==0) + +if (npeaks > 0 && NAcount==0) { ######################## II) segmentation tools ############################# ############################################################################# - #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours]) - colourvector = c($color_string) + + #if str( $segm_cond.segmentationtool ) == 'kmeans': + number_colors = max(c($segm_cond.kmeans_k)) + + #elif str( $segm_cond.segmentationtool ) == 'centroids': + number_colors = max(c($segm_cond.centroids_k)) + + #end if + + #if str($colour_conditional.colour_type) == "manual_colour" + #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) + colourvector = c($color_string) + + #elif str($colour_conditional.colour_type) == "colourpalette" + number_levels = (number_colors) + colourvector = noquote($colour_conditional.palettes)(number_levels) + + #end if + ## set seed to make analysis reproducible set.seed($setseed) - #if str( $segm_cond.segmentationtool ) == 'pca': - print('pca') - ##pca - - component_vector = character() - for (numberofcomponents in 1:$segm_cond.pca_ncomp) - {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)} - - pca_result = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, - method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1)) + #if str( $segm_cond.segmentationtool ) == 'kmeans': + print('kmeans') + ##k-means + skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="gaussian") ## remove msidata to clean up RAM space rm(msidata) gc() - ### table in pdf file - plot(0,type='n',axes=FALSE,ann=FALSE) - sd_table = as.data.frame(round(pca_result@resultData\$ncomp\$sdev, digits=2)) - colnames(sd_table) = "Standard deviation" - PC_vector = character() - for (PCs in 1:$segm_cond.pca_ncomp){ - PC_vector[[PCs]] = c(paste0("PC",PCs))} - sd_table = cbind(PC_vector, sd_table) - colnames(sd_table)[1] = "Principal components" - grid.table(sd_table, rows=NULL) - ### images in pdf file - print(image(pca_result, main="PCA image", strip = FALSE, col=colourvector, ylim=c(maximumy+2, minimumy-2))) - for (PCs in 1:$segm_cond.pca_ncomp){ - print(image(pca_result, column = c(paste0("PC",PCs)),strip = FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} - ### plots in pdf file - print(plot(pca_result, main="PCA plot", col= colourvector, strip = FALSE)) - for (PCs in 1:$segm_cond.pca_ncomp){ - print(plot(pca_result, column = c(paste0("PC",PCs)),main=paste0("PC", PCs),strip = FALSE,superpose = FALSE))} - - ### values in tabular files - pcaloadings = formatC(pca_result@resultData\$ncomp\$loadings, format = "e", digits = 6)### loading for each m/z value - pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings) - colnames(pcaloadings2) = c("mz", colnames(pcaloadings)) - pcascores = round(pca_result@resultData\$ncomp\$scores, digits=6) ### scores for each pixel + k_value = c($segm_cond.kmeans_k) + r_value = c($segm_cond.kmeans_r) - ## pixel names and coordinates - ## to remove potential sample names and z dimension, split at comma and take only x and y - x_coords = unlist(lapply(strsplit(rownames(pcascores), ","), `[[`, 1)) - y_coords = unlist(lapply(strsplit(rownames(pcascores), ","), `[[`, 2)) - x_coordinates = gsub("x = ","",x_coords) - y_coordinates = gsub(" y = ","",y_coords) - - pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) - pcascores2 = data.frame(pixel_names, x_coordinates, y_coordinates, pcascores) - colnames(pcascores2) = c("pixel names", "x", "y", colnames(pcascores)) - write.table(pcaloadings2, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") - write.table(pcascores2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + for (k in k_value) { + for (r in r_value) { + print(image(skm, key=TRUE, model = list(k = k, r = r), + main = paste("K-means clustering (r =", r, ", k =", k, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) - ## optional output as .RData - #if $output_rdata: - ## save as (.RData) - save(pca, file="$segmentation_rdata") - - #end if - - #elif str( $segm_cond.segmentationtool ) == 'kmeans': - print('kmeans') - ##k-means - - skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method") - - ## remove msidata to clean up RAM space - rm(msidata) - gc() - - print(image(skm, key=TRUE, main="K-means clustering", strip=FALSE, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) - print(plot(skm, main="K-means plot", col= colourvector, strip=FALSE, layout=c(1,1))) + print(plot(skm, model = list(k = k, r = r), key = TRUE, + main = paste("K-means plot (r =", r, ", k =", k, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1))) + } + } skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(skm@resultData)){ - skm_cluster = ((skm@resultData)[[iteration]]\$cluster) - skm_clusters = cbind(skm_clusters, skm_cluster) } + skm_cluster = ((skm@resultData)[[iteration]]\$cluster) + skm_clusters = cbind(skm_clusters, skm_cluster) } - ## pixel names and coordinates - ## to remove potential sample names and z dimension, split at comma and take only x and y - x_coords = unlist(lapply(strsplit(rownames(skm_clusters), ","), `[[`, 1)) - y_coords = unlist(lapply(strsplit(rownames(skm_clusters), ","), `[[`, 2)) - x_coordinates = gsub("x = ","",x_coords) - y_coordinates = gsub(" y = ","",y_coords) - pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) - skm_clusters2 = data.frame(pixel_names, x_coordinates, y_coordinates, skm_clusters) - colnames(skm_clusters2) = c("pixel names", "x", "y",names(skm@resultData)) + skm.coordinates = coord(skm) + x_coords = skm.coordinates@listData[["x"]] + y_coords = skm.coordinates@listData[["y"]] + pixel_names = paste0("xy_", x_coords, "_", y_coords) + + skm_clusters2 = data.frame(pixel_names, x_coords, y_coords, skm_clusters) + r_values = skm@modelData@listData[["r"]] + k_values = skm@modelData@listData[["k"]] + new_names = paste0("r=", r_values, ", k=", k_values) + colnames(skm_clusters2) = c("pixel names", "x", "y", new_names) skm_toplabels = topFeatures(skm, n=$segm_cond.kmeans_toplabels) @@ -165,36 +135,136 @@ print('centroids') ##centroids - ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method") + ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="gaussian") + ## remove msidata to clean up RAM space rm(msidata) gc() - print(image(ssc, key=TRUE, main="Spatial shrunken centroids", strip = TRUE, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) - print(plot(ssc, main="Spatial shrunken centroids plot", col= colourvector, strip = TRUE,layout=c(1,1))) - print(plot(ssc, mode = "tstatistics",key = TRUE, layout = c(1,1), main="t-statistics", col=colourvector)) + + ## new plots and summary table + + summary_df = summary(ssc) + summary_df = as.data.frame(summary_df@listData) + colnames(summary_df) = c("r", "initial_k", "s", "k", "features_per_k") + + opar <- par() + par(opar) + plot(0,type='n',axes=FALSE,ann=FALSE) + title(main="\n Summary for the different parameters\n", adj=0.5) + ## 20 rows fits in one page: + if (nrow(summary_df)<=20){ + grid.table(summary_df, rows= NULL) + }else{ + grid.table(summary_df[1:20,], rows= NULL) + mincount = 21 + maxcount = 40 + for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ + plot(0,type='n',axes=FALSE,ann=FALSE) + if (maxcount <= nrow(summary_df)){ + grid.table(summary_df[mincount:maxcount,], rows= NULL) + mincount = mincount+20 + maxcount = maxcount+20 + }else{### stop last page with last sample otherwise NA in table + grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} + } + } + + ## plot + summary_df\$r <- factor(summary_df\$r) + summary_df\$initial_k <- factor(summary_df\$initial_k) + + cluster_plot = ggplot(summary_df, aes(x = s, y = k, color = initial_k)) + + geom_point(size = 3) + ### Add points + geom_line() + + theme_bw() + + facet_wrap(~ paste("r =", r)) + + labs(title = "Number of segments", y = "predicted number of k", x = "shrinkage parameter (s)") + + print(cluster_plot) + + s_value = c($segm_cond.centroids_s) + k_value = c($segm_cond.centroids_k) + r_value = c($segm_cond.centroids_r) + + to_remove = subset(summary_df, features_per_k == 0) + s_to_remove = unique(c(to_remove\$s)) + s_value = s_value[!s_value %in% s_to_remove] - plot(summary(ssc), main = "Number of segments") + for (s in s_value) { + for (k in k_value) { + for (r in r_value) { + print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class", + main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) + + print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability", + main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) + + print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE, + main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"), + col = colourvector, strip = TRUE, layout = c(1, 1))) + + print(plot(ssc, model = list(s = s, k = k, r = r), values = "statistic", key = TRUE, + layout = c(1, 1), + main = paste("t-statistics (s =", s, ", k =", k, ", r =", r, ")"), + col = colourvector)) + } + } + } + + + new_s_value = s_to_remove + + for (s in new_s_value) { + for (k in k_value) { + for (r in r_value) { + print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class", + main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) + + print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability", + main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"), + strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) + + print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE, + main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"), + col = colourvector, strip = TRUE, layout = c(1, 1))) + + plot(0, 0, type = "n", xlab = "", ylab = "", xlim = c(0, 10), ylim = c(0, 10), xaxt = "n", yaxt = "n") + ## Add the text to the plot + text(5, 5, "t-statistics plot can not be drawn.\nS (shrinkage parameter) is too small to result\n in meaningful segmentation.", + cex = 1.5, adj = c(0.5, 0.5)) + + } + } + } ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) - for (iteration in 1:length(ssc@resultData)){ - ssc_class = ((ssc@resultData)[[iteration]]\$classes) + for (iteration in 1:length(ssc@resultData@listData)){ + ssc_class = ((ssc@resultData@listData)[[iteration]]\$class) ssc_classes = cbind(ssc_classes, ssc_class) } - ## pixel names and coordinates - ## to remove potential sample names and z dimension, split at comma and take only x and y - x_coords = unlist(lapply(strsplit(rownames(ssc_classes), ","), `[[`, 1)) - y_coords = unlist(lapply(strsplit(rownames(ssc_classes), ","), `[[`, 2)) - x_coordinates = gsub("x = ","",x_coords) - y_coordinates = gsub(" y = ","",y_coords) - pixel_names = paste0("xy_", x_coordinates, "_", y_coordinates) - ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) - colnames(ssc_classes2) = c("pixel names", "x", "y", names(ssc@resultData)) + ## coordinates and topFeatures of results + s_values = ssc@modelData@listData[["s"]] + r_values = ssc@modelData@listData[["r"]] + k_values = ssc@modelData@listData[["k"]] + new_names = paste0("r=", r_values, ", s=", s_values, ", k=", k_values) + + ssc.coordinates = coord(ssc) + x_coords = ssc.coordinates@listData[["x"]] + y_coords = ssc.coordinates@listData[["y"]] + pixel_names = paste0("xy_", x_coords, "_", y_coords) + + ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes) + colnames(ssc_classes2) = c("pixel names", "x", "y", new_names) ssc_toplabels = topFeatures(ssc, n=$segm_cond.centroids_toplabels) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + ## optional output as .RData #if $output_rdata: @@ -212,7 +282,6 @@ print("svg image") ## reverse y axis for svg output = correct order and nice svg image - svg(file="svg_pixel_output.svg", width=maximumx, height=maximumy) par(mar=c(0,0,0,0)) #if str( $segm_cond.segmentationtool ) == 'pca': @@ -230,6 +299,8 @@ }else{ + plot.new() + text(0.5, 0.5, "Inputfile has no intensities > 0 \n or contains NA values.", cex = 1.5) print("Inputfile has no intensities > 0") dev.off() } @@ -240,20 +311,9 @@ <expand macro="reading_msidata"/> <conditional name="segm_cond"> <param name="segmentationtool" type="select" label="Select the tool for spatial clustering"> - <option value="pca" selected="True">pca</option> <option value="kmeans">k-means</option> <option value="centroids">spatial shrunken centroids</option> </param> - <when value="pca"> - <param name="pca_ncomp" type="integer" value="2" - label="The number of principal components to calculate"/> - <param name="pca_method" type="select" - label="The function used to calculate the singular value decomposition"> - <option value="irlba" selected="True">irlba</option> - <option value="svd">svd</option> - </param> - <param name="pca_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Scaling of data before analysis"/> - </when> <when value="kmeans"> <param name="kmeans_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> @@ -263,12 +323,7 @@ label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> - <param name="kmeans_method" type="select" display="radio" - label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering"> - <option value="gaussian">gaussian</option> - <option value="adaptive" selected="True">adaptive</option> - </param> - <param name="kmeans_toplabels" type="integer" value="500" + <param name="kmeans_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> @@ -286,26 +341,42 @@ help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> - <param name="centroids_method" type="select" display="radio" label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> - <option value="gaussian">gaussian</option> - <option value="adaptive" selected="True">adaptive</option> - </param> + <param name="centroids_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> </conditional> <param name="svg_pixelimage" type="boolean" label="Export first segmentation image as svg"/> - <repeat name="colours" title="Colours for the plots" min="1" max="50"> - <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> - <sanitizer> - <valid initial="string.letters,string.digits"> - <add value="#" /> - </valid> - </sanitizer> + + <conditional name="colour_conditional"> + <param name="colour_type" type="select" label="Choose a colour scheme"> + <option value="colourpalette" selected="True" >Colour palette</option> + <option value="manual_colour">Manual selection</option> + </param> + <when value="manual_colour"> + <repeat name="colours" title="Colours for the plots" min="1" max="50"> + <param name="annotation_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#" /> + </valid> + </sanitizer> + </param> + </repeat> + </when> + <when value="colourpalette"> + <param name="palettes" type="select" display="radio" label="Select a colourpalette"> + <option value="hue_pal()" selected="True">hue</option> + <option value="rainbow">rainbow</option> + <option value="heat.colors">heat colors</option> + <option value="terrain.colors">terrain colors</option> + <option value="topo.colors">topo colors</option> + <option value="cm.colors">cm colors</option> </param> - </repeat> - <param name="output_rdata" type="boolean" label="Results as .RData output"/> - <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/> + </when> + </conditional> + <param name="output_rdata" type="boolean" label="Results as .RData output"/> + <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/> </inputs> <outputs> <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> @@ -319,28 +390,7 @@ </data> </outputs> <tests> - <test> - <expand macro="infile_imzml"/> - <param name="segmentationtool" value="pca"/> - <repeat name="colours"> - <param name="feature_color" value="#ff00ff"/> - </repeat> - <repeat name="colours"> - <param name="feature_color" value="#0000FF"/> - </repeat> - <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size"/> - <output name="mzfeatures"> - <assert_contents> - <has_text text="300.1667" /> - <has_text text="300.25" /> - <has_text text="-4.234458e-04" /> - <has_text text="3.878545e-10" /> - <has_n_columns n="3" /> - </assert_contents> - </output> - <output name="pixeloutput" file="scores_pca.tabular"/> - </test> - <test> + <test expect_num_outputs="4"> <expand macro="infile_imzml"/> <param name="segmentationtool" value="kmeans"/> <param name="kmeans_r" value="1:3"/> @@ -361,7 +411,7 @@ <output name="pixeloutput" file="cluster_skm.tabular"/> <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/> </test> - <test> + <test expect_num_outputs="3"> <param name="infile" value="preprocessed.RData" ftype="rdata"/> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1,2"/> @@ -380,7 +430,7 @@ <output name="mzfeatures" file="toplabels_ssc.tabular"/> <output name="pixeloutput" file="classes_ssc.tabular"/> </test> - <test> + <test expect_num_outputs="3"> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> @@ -401,16 +451,20 @@ <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> - <output name="segmentationimages" file="centroids_proc.pdf" compare="sim_size"/> + <output name="segmentationimages" ftype="pdf"> + <assert_contents> + <has_size value="1206464" delta="100"/> + </assert_contents> + </output> <output name="pixeloutput" file="classes_proc.tabular"/> <output name="mzfeatures"> <assert_contents> - <has_text text="100.642" /> - <has_text text="101.816297645089" /> - <has_text text="1.34687866193417" /> - <has_text text="6.43855724908388" /> - <has_n_columns n="9" /> - <has_n_lines n="101" /> + <has_text text="177.926436700994"/> + <has_text text="192.976841249583"/> + <has_text text="0.818218808031712"/> + <has_text text="0.469980133537009"/> + <has_n_columns n="7"/> + <has_n_lines n="101"/> </assert_contents> </output> </test>