Mercurial > repos > galaxyp > cardinal_segmentations
view segmentation.xml @ 17:91f0f5922011 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:36:52 +0000 |
parents | 050bcc806da2 |
children |
line wrap: on
line source
<tool id="cardinal_segmentations" name="MSI segmentation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description>mass spectrometry imaging spatial clustering</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${MSI_segmentation}' && Rscript '${MSI_segmentation}' ]]> </command> <configfiles> <configfile name="MSI_segmentation"><![CDATA[ ################################# load libraries and read file ################# library(Cardinal) library(gridExtra) library(ggplot2) library(scales) @READING_MSIDATA@ msidata = as(msidata, "MSImagingExperiment") ## remove duplicated coordinates msidata <- msidata[,!duplicated(coord(msidata))] @DATA_PROPERTIES_INRAM@ ######################################## PDF ################################### ################################################################################ ################################################################################ pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name")) ############################# I) numbers #################################### ############################################################################# grid.table(property_df, rows= NULL) if (npeaks > 0 && NAcount==0) { ######################## II) segmentation tools ############################# ############################################################################# #if str( $segm_cond.segmentationtool ) == 'kmeans': number_colors = max(c($segm_cond.kmeans_k)) #elif str( $segm_cond.segmentationtool ) == 'centroids': number_colors = max(c($segm_cond.centroids_k)) #end if #if str($colour_conditional.colour_type) == "manual_colour" #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours]) colourvector = c($color_string) #elif str($colour_conditional.colour_type) == "colourpalette" number_levels = (number_colors) colourvector = noquote($colour_conditional.palettes)(number_levels) #end if ## set seed to make analysis reproducible set.seed($setseed) #if str( $segm_cond.segmentationtool ) == 'kmeans': print('kmeans') ##k-means skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="gaussian") ## remove msidata to clean up RAM space rm(msidata) gc() k_value = c($segm_cond.kmeans_k) r_value = c($segm_cond.kmeans_r) for (k in k_value) { for (r in r_value) { print(image(skm, key=TRUE, model = list(k = k, r = r), main = paste("K-means clustering (r =", r, ", k =", k, ")"), strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) print(plot(skm, model = list(k = k, r = r), key = TRUE, main = paste("K-means plot (r =", r, ", k =", k, ")"), strip = FALSE, col = colourvector, layout = c(1, 1))) } } skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(skm@resultData)){ skm_cluster = ((skm@resultData)[[iteration]]\$cluster) skm_clusters = cbind(skm_clusters, skm_cluster) } skm.coordinates = coord(skm) x_coords = skm.coordinates@listData[["x"]] y_coords = skm.coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) skm_clusters2 = data.frame(pixel_names, x_coords, y_coords, skm_clusters) r_values = skm@modelData@listData[["r"]] k_values = skm@modelData@listData[["k"]] new_names = paste0("r=", r_values, ", k=", k_values) colnames(skm_clusters2) = c("pixel names", "x", "y", new_names) skm_toplabels = topFeatures(skm, n=$segm_cond.kmeans_toplabels) write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(skm_clusters2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(skm, file="$segmentation_rdata") #end if #elif str( $segm_cond.segmentationtool ) == 'centroids': print('centroids') ##centroids ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="gaussian") ## remove msidata to clean up RAM space rm(msidata) gc() ## new plots and summary table summary_df = summary(ssc) summary_df = as.data.frame(summary_df@listData) colnames(summary_df) = c("r", "initial_k", "s", "k", "features_per_k") opar <- par() par(opar) plot(0,type='n',axes=FALSE,ann=FALSE) title(main="\n Summary for the different parameters\n", adj=0.5) ## 20 rows fits in one page: if (nrow(summary_df)<=20){ grid.table(summary_df, rows= NULL) }else{ grid.table(summary_df[1:20,], rows= NULL) mincount = 21 maxcount = 40 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ plot(0,type='n',axes=FALSE,ann=FALSE) if (maxcount <= nrow(summary_df)){ grid.table(summary_df[mincount:maxcount,], rows= NULL) mincount = mincount+20 maxcount = maxcount+20 }else{### stop last page with last sample otherwise NA in table grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} } } ## plot summary_df\$r <- factor(summary_df\$r) summary_df\$initial_k <- factor(summary_df\$initial_k) cluster_plot = ggplot(summary_df, aes(x = s, y = k, color = initial_k)) + geom_point(size = 3) + ### Add points geom_line() + theme_bw() + facet_wrap(~ paste("r =", r)) + labs(title = "Number of segments", y = "predicted number of k", x = "shrinkage parameter (s)") print(cluster_plot) s_value = c($segm_cond.centroids_s) k_value = c($segm_cond.centroids_k) r_value = c($segm_cond.centroids_r) to_remove = subset(summary_df, features_per_k == 0) s_to_remove = unique(c(to_remove\$s)) s_value = s_value[!s_value %in% s_to_remove] for (s in s_value) { for (k in k_value) { for (r in r_value) { print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class", main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"), strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability", main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"), strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE, main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"), col = colourvector, strip = TRUE, layout = c(1, 1))) print(plot(ssc, model = list(s = s, k = k, r = r), values = "statistic", key = TRUE, layout = c(1, 1), main = paste("t-statistics (s =", s, ", k =", k, ", r =", r, ")"), col = colourvector)) } } } new_s_value = s_to_remove for (s in new_s_value) { for (k in k_value) { for (r in r_value) { print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class", main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"), strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability", main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"), strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2))) print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE, main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"), col = colourvector, strip = TRUE, layout = c(1, 1))) plot(0, 0, type = "n", xlab = "", ylab = "", xlim = c(0, 10), ylim = c(0, 10), xaxt = "n", yaxt = "n") ## Add the text to the plot text(5, 5, "t-statistics plot can not be drawn.\nS (shrinkage parameter) is too small to result\n in meaningful segmentation.", cex = 1.5, adj = c(0.5, 0.5)) } } } ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) for (iteration in 1:length(ssc@resultData@listData)){ ssc_class = ((ssc@resultData@listData)[[iteration]]\$class) ssc_classes = cbind(ssc_classes, ssc_class) } ## coordinates and topFeatures of results s_values = ssc@modelData@listData[["s"]] r_values = ssc@modelData@listData[["r"]] k_values = ssc@modelData@listData[["k"]] new_names = paste0("r=", r_values, ", s=", s_values, ", k=", k_values) ssc.coordinates = coord(ssc) x_coords = ssc.coordinates@listData[["x"]] y_coords = ssc.coordinates@listData[["y"]] pixel_names = paste0("xy_", x_coords, "_", y_coords) ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes) colnames(ssc_classes2) = c("pixel names", "x", "y", new_names) ssc_toplabels = topFeatures(ssc, n=$segm_cond.centroids_toplabels) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## optional output as .RData #if $output_rdata: ## save as (.RData) save(ssc, file="$segmentation_rdata") #end if #end if dev.off() ## optional svg output with original coordinates #if $svg_pixelimage: print("svg image") ## reverse y axis for svg output = correct order and nice svg image svg(file="svg_pixel_output.svg", width=maximumx, height=maximumy) par(mar=c(0,0,0,0)) #if str( $segm_cond.segmentationtool ) == 'pca': coord(pca_result)\$y <- max(coord(pca_result)\$y) - coord(pca_result)\$y + 1 image(pca_result, strip = FALSE, colorkey=FALSE, axes=FALSE, xlab=NA, ylab=NA, col=colourvector) #elif str( $segm_cond.segmentationtool ) == 'kmeans': coord(skm)\$y <- max(coord(skm)\$y) - coord(skm)\$y + 1 image(skm, key=FALSE, strip=FALSE, col= colourvector) #elif str( $segm_cond.segmentationtool ) == 'centroids': coord(ssc)\$y <- max(coord(ssc)\$y) - coord(ssc)\$y + 1 image(ssc, key=FALSE, strip = FALSE, col= colourvector) #end if dev.off() #end if }else{ plot.new() text(0.5, 0.5, "Inputfile has no intensities > 0 \n or contains NA values.", cex = 1.5) print("Inputfile has no intensities > 0") dev.off() } ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <conditional name="segm_cond"> <param name="segmentationtool" type="select" label="Select the tool for spatial clustering"> <option value="kmeans">k-means</option> <option value="centroids">spatial shrunken centroids</option> </param> <when value="kmeans"> <param name="kmeans_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="kmeans_k" type="text" value="3" label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="kmeans_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> <when value="centroids"> <param name="centroids_r" type="text" value="2" label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_k" type="text" value="5" label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_s" type="text" value="2" label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)"> <expand macro="sanitizer_multiple_digits"/> </param> <param name="centroids_toplabels" type="integer" value="500" label="Number of toplabels (m/z) which should be written in tabular output"/> </when> </conditional> <param name="svg_pixelimage" type="boolean" label="Export first segmentation image as svg"/> <conditional name="colour_conditional"> <param name="colour_type" type="select" label="Choose a colour scheme"> <option value="colourpalette" selected="True" >Colour palette</option> <option value="manual_colour">Manual selection</option> </param> <when value="manual_colour"> <repeat name="colours" title="Colours for the plots" min="1" max="50"> <param name="annotation_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> <sanitizer> <valid initial="string.letters,string.digits"> <add value="#" /> </valid> </sanitizer> </param> </repeat> </when> <when value="colourpalette"> <param name="palettes" type="select" display="radio" label="Select a colourpalette"> <option value="hue_pal()" selected="True">hue</option> <option value="rainbow">rainbow</option> <option value="heat.colors">heat colors</option> <option value="terrain.colors">terrain colors</option> <option value="topo.colors">topo colors</option> <option value="cm.colors">cm colors</option> </param> </when> </conditional> <param name="output_rdata" type="boolean" label="Results as .RData output"/> <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/> </inputs> <outputs> <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData"> <filter>output_rdata</filter> </data> <data format="svg" name="svg_output" from_work_dir="svg_pixel_output.svg" label="${tool.name} on ${on_string}: image.svg"> <filter>svg_pixelimage</filter> </data> </outputs> <tests> <test expect_num_outputs="4"> <expand macro="infile_imzml"/> <param name="segmentationtool" value="kmeans"/> <param name="kmeans_r" value="1:3"/> <param name="kmeans_k" value="2,3"/> <param name="kmeans_toplabels" value="20"/> <repeat name="colours"> <param name="feature_color" value="#ff00ff"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <param name="output_rdata" value="True"/> <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size"/> <output name="mzfeatures" file="toplabels_skm.tabular"/> <output name="pixeloutput" file="cluster_skm.tabular"/> <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/> </test> <test expect_num_outputs="3"> <param name="infile" value="preprocessed.RData" ftype="rdata"/> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1,2"/> <param name="centroids_k" value="3"/> <param name="centroids_toplabels" value="50"/> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size"/> <output name="mzfeatures" file="toplabels_ssc.tabular"/> <output name="pixeloutput" file="classes_ssc.tabular"/> </test> <test expect_num_outputs="3"> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <param name="segmentationtool" value="centroids"/> <param name="centroids_r" value="1"/> <param name="centroids_k" value="2,3"/> <param name="centroids_s" value="0,3"/> <param name="centroids_toplabels" value="100"/> <repeat name="colours"> <param name="feature_color" value="#0000FF"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#00C957"/> </repeat> <repeat name="colours"> <param name="feature_color" value="#B0171F"/> </repeat> <output name="segmentationimages" ftype="pdf"> <assert_contents> <has_size value="1206464" delta="100"/> </assert_contents> </output> <output name="pixeloutput" file="classes_proc.tabular"/> <output name="mzfeatures"> <assert_contents> <has_text text="177.926436700994"/> <has_text text="192.976841249583"/> <has_text text="0.818218808031712"/> <has_text text="0.469980133537009"/> <has_n_columns n="7"/> <has_n_lines n="101"/> </assert_contents> </output> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ - NA intensities are not allowed - duplicated coordinates will be removed **Options** - PCA: principal component analysis - k-means: spatially-aware k-means clustering (adopted from `Alexandrov and Kobarg <https://doi.org/10.1093/bioinformatics/btr246>`_) - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows selection of the number of clusters (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_) **Output** - Pdf with the heatmaps and plots for the segmentation - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids) - Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package - Optional: svg file with the first segmentation image ]]> </help> <expand macro="citations"/> </tool>