view segmentation.xml @ 17:91f0f5922011 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author galaxyp
date Thu, 04 Jul 2024 13:36:52 +0000
parents 050bcc806da2
children
line wrap: on
line source

<tool id="cardinal_segmentations" name="MSI segmentation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
    <description>mass spectrometry imaging spatial clustering</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
    <![CDATA[

        @INPUT_LINKING@
        cat '${MSI_segmentation}' &&
        Rscript '${MSI_segmentation}'

    ]]>
    </command>
    <configfiles>
        <configfile name="MSI_segmentation"><![CDATA[

################################# load libraries and read file #################

library(Cardinal)
library(gridExtra)
library(ggplot2)
library(scales)

@READING_MSIDATA@

       msidata = as(msidata, "MSImagingExperiment")

## remove duplicated coordinates
msidata <- msidata[,!duplicated(coord(msidata))]


@DATA_PROPERTIES_INRAM@


######################################## PDF ###################################
################################################################################
################################################################################


pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name"))


############################# I) numbers ####################################
#############################################################################
grid.table(property_df, rows= NULL)


if (npeaks > 0 && NAcount==0)
{

######################## II) segmentation tools #############################
#############################################################################

        #if str( $segm_cond.segmentationtool ) == 'kmeans':
	     number_colors = max(c($segm_cond.kmeans_k))

	    #elif str( $segm_cond.segmentationtool ) == 'centroids':
	     number_colors = max(c($segm_cond.centroids_k))

	    #end if

	    #if str($colour_conditional.colour_type) == "manual_colour"
	        #set $color_string = ','.join(['"%s"' % $color.annotation_color for $color in $colour_conditional.colours])
	        colourvector = c($color_string)

	    #elif str($colour_conditional.colour_type) == "colourpalette"
	        number_levels = (number_colors)
	        colourvector = noquote($colour_conditional.palettes)(number_levels)

	    #end if


        ## set seed to make analysis reproducible
        set.seed($setseed)

        #if str( $segm_cond.segmentationtool ) == 'kmeans':
            print('kmeans')
            ##k-means
            skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="gaussian")

            ## remove msidata to clean up RAM space
            rm(msidata)
            gc()

            k_value = c($segm_cond.kmeans_k)
            r_value = c($segm_cond.kmeans_r)

            for (k in k_value) {
                for (r in r_value) {
                    print(image(skm, key=TRUE, model = list(k = k, r = r),
                                main = paste("K-means clustering (r =", r, ", k =", k, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2)))

                    print(plot(skm, model = list(k = k, r = r), key = TRUE,
                                main = paste("K-means plot (r =", r, ", k =", k, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1)))
              }
            }

            skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
            for (iteration in 1:length(skm@resultData)){
                    skm_cluster = ((skm@resultData)[[iteration]]\$cluster)
                    skm_clusters = cbind(skm_clusters, skm_cluster) }

            skm.coordinates = coord(skm)
            x_coords = skm.coordinates@listData[["x"]]
            y_coords = skm.coordinates@listData[["y"]]
            pixel_names = paste0("xy_", x_coords, "_", y_coords)

            skm_clusters2 = data.frame(pixel_names, x_coords, y_coords, skm_clusters)
            r_values = skm@modelData@listData[["r"]]
            k_values = skm@modelData@listData[["k"]]
            new_names = paste0("r=", r_values, ", k=", k_values)
            colnames(skm_clusters2) = c("pixel names", "x", "y", new_names)

            skm_toplabels = topFeatures(skm, n=$segm_cond.kmeans_toplabels)

            write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
            write.table(skm_clusters2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")

            ## optional output as .RData
            #if $output_rdata:

            ## save as (.RData)
            save(skm, file="$segmentation_rdata")

            #end if

        #elif str( $segm_cond.segmentationtool ) == 'centroids':
            print('centroids')
            ##centroids

            ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="gaussian")

            ## remove msidata to clean up RAM space
            rm(msidata)
            gc()

            ## new plots and summary table

            summary_df = summary(ssc)
            summary_df = as.data.frame(summary_df@listData)
            colnames(summary_df) = c("r", "initial_k", "s", "k", "features_per_k")

            opar <- par()
            par(opar)
            plot(0,type='n',axes=FALSE,ann=FALSE)
            title(main="\n Summary for the different parameters\n", adj=0.5)
            ## 20 rows fits in one page:
            if (nrow(summary_df)<=20){
                grid.table(summary_df, rows= NULL)
            }else{
                grid.table(summary_df[1:20,], rows= NULL)
                mincount = 21
                maxcount = 40
                for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){
                    plot(0,type='n',axes=FALSE,ann=FALSE)
                    if (maxcount <= nrow(summary_df)){
                        grid.table(summary_df[mincount:maxcount,], rows= NULL)
                        mincount = mincount+20
                        maxcount = maxcount+20
                    }else{### stop last page with last sample otherwise NA in table
                        grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)}
                }
            }

            ## plot
            summary_df\$r <- factor(summary_df\$r)
            summary_df\$initial_k <- factor(summary_df\$initial_k)

            cluster_plot = ggplot(summary_df, aes(x = s, y = k, color = initial_k)) +
                        geom_point(size = 3) +   ### Add points
                        geom_line() +
                        theme_bw() +
                        facet_wrap(~ paste("r =", r)) +
                        labs(title =  "Number of segments", y = "predicted number of k", x = "shrinkage parameter (s)")

            print(cluster_plot)

            s_value = c($segm_cond.centroids_s)
            k_value = c($segm_cond.centroids_k)
            r_value = c($segm_cond.centroids_r)

            to_remove = subset(summary_df, features_per_k == 0)
            s_to_remove = unique(c(to_remove\$s))
            s_value = s_value[!s_value %in% s_to_remove]

            for (s in s_value) {
              for (k in k_value) {
                for (r in r_value) {
                    print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class",
                                main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2)))

                    print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability",
                                main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2)))

                    print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE,
                               main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"),
                               col = colourvector, strip = TRUE, layout = c(1, 1)))

                    print(plot(ssc, model = list(s = s, k = k, r = r), values = "statistic", key = TRUE,
                               layout = c(1, 1),
                               main = paste("t-statistics (s =", s, ", k =", k, ", r =", r, ")"),
                               col = colourvector))
                }
              }
            }


            new_s_value = s_to_remove

            for (s in new_s_value) {
              for (k in k_value) {
                for (r in r_value) {
                    print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "class",
                                main = paste("Spatial shrunken centroids (s =", s, ", k =", k, ", r =", r, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2)))

                    print(image(ssc, model = list(s = s, k = k, r = r), key = TRUE, values = "probability",
                                main = paste("Class Probability (s =", s, ", k =", k, ", r =", r, ")"),
                                strip = FALSE, col = colourvector, layout = c(1, 1), ylim = c(maximumy+2, minimumy-2)))

                    print(plot(ssc, model = list(s = s, k = k, r = r), key = TRUE,
                               main = paste("Spatial shrunken centroids features (s =", s, ", k =", k, ", r =", r, ")"),
                               col = colourvector, strip = TRUE, layout = c(1, 1)))

                    plot(0, 0, type = "n", xlab = "", ylab = "", xlim = c(0, 10), ylim = c(0, 10), xaxt = "n", yaxt = "n")
                    ## Add the text to the plot
                    text(5, 5, "t-statistics plot can not be drawn.\nS (shrinkage parameter) is too small to result\n in meaningful segmentation.",
                             cex = 1.5, adj = c(0.5, 0.5))

                }
              }
            }

            ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
            for (iteration in 1:length(ssc@resultData@listData)){
            ssc_class = ((ssc@resultData@listData)[[iteration]]\$class)
            ssc_classes = cbind(ssc_classes, ssc_class) }

            ## coordinates and topFeatures of results
            s_values = ssc@modelData@listData[["s"]]
            r_values = ssc@modelData@listData[["r"]]
            k_values = ssc@modelData@listData[["k"]]
            new_names = paste0("r=", r_values, ", s=", s_values, ", k=", k_values)

            ssc.coordinates = coord(ssc)
            x_coords = ssc.coordinates@listData[["x"]]
            y_coords = ssc.coordinates@listData[["y"]]
            pixel_names = paste0("xy_", x_coords, "_", y_coords)

            ssc_classes2 = data.frame(pixel_names, x_coords, y_coords, ssc_classes)
            colnames(ssc_classes2) = c("pixel names", "x", "y", new_names)

            ssc_toplabels =  topFeatures(ssc, n=$segm_cond.centroids_toplabels)

            write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
            write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")


            ## optional output as .RData
            #if $output_rdata:

            ## save as (.RData)
            save(ssc, file="$segmentation_rdata")

            #end if

        #end if

    dev.off()

        ## optional svg output with original coordinates
        #if $svg_pixelimage:
            print("svg image")
            ## reverse y axis for svg output = correct order and nice svg image

            svg(file="svg_pixel_output.svg", width=maximumx, height=maximumy)
            par(mar=c(0,0,0,0))
            #if str( $segm_cond.segmentationtool ) == 'pca':
                coord(pca_result)\$y <- max(coord(pca_result)\$y) - coord(pca_result)\$y + 1
                image(pca_result, strip = FALSE, colorkey=FALSE, axes=FALSE, xlab=NA, ylab=NA, col=colourvector)
            #elif str( $segm_cond.segmentationtool ) == 'kmeans':
                coord(skm)\$y <- max(coord(skm)\$y) - coord(skm)\$y + 1
                image(skm, key=FALSE, strip=FALSE, col= colourvector)
            #elif str( $segm_cond.segmentationtool ) == 'centroids':
                coord(ssc)\$y <- max(coord(ssc)\$y) - coord(ssc)\$y + 1
                image(ssc, key=FALSE, strip = FALSE, col= colourvector)
            #end if
            dev.off()
        #end if


}else{
    plot.new()
    text(0.5, 0.5, "Inputfile has no intensities > 0  \n or contains NA values.", cex = 1.5)
    print("Inputfile has no intensities > 0")
    dev.off()
}

    ]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="reading_msidata"/>
            <conditional name="segm_cond">
                <param name="segmentationtool" type="select" label="Select the tool for spatial clustering">
                    <option value="kmeans">k-means</option>
                    <option value="centroids">spatial shrunken centroids</option>
                </param>
                <when value="kmeans">
                    <param name="kmeans_r" type="text" value="2"
                           label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
                        <expand macro="sanitizer_multiple_digits"/>
                    </param>
                    <param name="kmeans_k" type="text" value="3"
                           label="The number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
                        <expand macro="sanitizer_multiple_digits"/>
                    </param>
                    <param name="kmeans_toplabels" type="integer" value="500"
                       label="Number of toplabels (m/z) which should be written in tabular output"/>
                 </when>

                <when value="centroids">
                    <param name="centroids_r" type="text" value="2"
                           label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
                        <expand macro="sanitizer_multiple_digits"/>
                    </param>
                    <param name="centroids_k" type="text" value="5"
                           label="The initial number of clusters (k)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
                        <expand macro="sanitizer_multiple_digits"/>
                    </param>
                    <param name="centroids_s" type="text" value="2"
                           label="The sparsity thresholding parameter by which to shrink the t-statistics (s)"
                           help="As s increases, fewer m/z features (m/z values) will be used in the spatial segmentation, and only the informative m/z features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)">
                        <expand macro="sanitizer_multiple_digits"/>
                    </param>

                <param name="centroids_toplabels" type="integer" value="500"
                       label="Number of toplabels (m/z) which should be written in tabular output"/>
                </when>
            </conditional>
            <param name="svg_pixelimage" type="boolean" label="Export first segmentation image as svg"/>

            <conditional name="colour_conditional">
            <param name="colour_type" type="select" label="Choose a colour scheme">
                <option value="colourpalette" selected="True" >Colour palette</option>
                <option value="manual_colour">Manual selection</option>
            </param>
            <when value="manual_colour">
               <repeat name="colours" title="Colours for the plots" min="1" max="50">
               <param name="annotation_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components">
               <sanitizer>
                   <valid initial="string.letters,string.digits">
                   <add value="#" />
                   </valid>
               </sanitizer>
               </param>
               </repeat>
            </when>
            <when value="colourpalette">
                <param name="palettes" type="select" display="radio" label="Select a colourpalette">
                <option value="hue_pal()" selected="True">hue</option>
                <option value="rainbow">rainbow</option>
                <option value="heat.colors">heat colors</option>
                <option value="terrain.colors">terrain colors</option>
                <option value="topo.colors">topo colors</option>
                <option value="cm.colors">cm colors</option>
                </param>
            </when>
            </conditional>
            <param name="output_rdata" type="boolean" label="Results as .RData output"/>
            <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/>
    </inputs>
    <outputs>
        <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/>
        <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/>
        <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/>
        <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData">
            <filter>output_rdata</filter>
        </data>
        <data format="svg" name="svg_output" from_work_dir="svg_pixel_output.svg" label="${tool.name} on ${on_string}: image.svg">
            <filter>svg_pixelimage</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="4">
            <expand macro="infile_imzml"/>
            <param name="segmentationtool" value="kmeans"/>
            <param name="kmeans_r" value="1:3"/>
            <param name="kmeans_k" value="2,3"/>
            <param name="kmeans_toplabels" value="20"/>
            <repeat name="colours">
                <param name="feature_color" value="#ff00ff"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#00C957"/>
            </repeat>
            <param name="output_rdata" value="True"/>
            <output name="segmentationimages" file="kmeans_analyze.pdf" compare="sim_size"/>
            <output name="mzfeatures" file="toplabels_skm.tabular"/>
            <output name="pixeloutput" file="cluster_skm.tabular"/>
            <output name="segmentation_rdata" file="cluster_skm.RData" compare="sim_size"/>
        </test>
        <test expect_num_outputs="3">
            <param name="infile" value="preprocessed.RData" ftype="rdata"/>
            <param name="segmentationtool" value="centroids"/>
            <param name="centroids_r" value="1,2"/>
            <param name="centroids_k" value="3"/>
            <param name="centroids_toplabels" value="50"/>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#00C957"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#B0171F"/>
            </repeat>
            <output name="segmentationimages" file="centroids_rdata.pdf" compare="sim_size"/>
            <output name="mzfeatures" file="toplabels_ssc.tabular"/>
            <output name="pixeloutput" file="classes_ssc.tabular"/>
        </test>
        <test expect_num_outputs="3">
           <expand macro="processed_infile_imzml"/>
            <conditional name="processed_cond">
                <param name="processed_file" value="processed"/>
                <param name="accuracy" value="200"/>
                <param name="units" value="ppm"/>
            </conditional>
            <param name="segmentationtool" value="centroids"/>
            <param name="centroids_r" value="1"/>
            <param name="centroids_k" value="2,3"/>
            <param name="centroids_s" value="0,3"/>
            <param name="centroids_toplabels" value="100"/>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#00C957"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#B0171F"/>
            </repeat>
            <output name="segmentationimages" ftype="pdf">
                <assert_contents>
                    <has_size value="1206464" delta="100"/>
                </assert_contents>
            </output>
            <output name="pixeloutput" file="classes_proc.tabular"/>
            <output name="mzfeatures">
                <assert_contents>
                    <has_text text="177.926436700994"/>
                    <has_text text="192.976841249583"/>
                    <has_text text="0.818218808031712"/>
                    <has_text text="0.469980133537009"/>
                    <has_n_columns n="7"/>
                    <has_n_lines n="101"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
        <![CDATA[

@CARDINAL_DESCRIPTION@

-----

This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data.

@MSIDATA_INPUT_DESCRIPTION@
            - NA intensities are not allowed
            - duplicated coordinates will be removed


**Options**

- PCA: principal component analysis
- k-means: spatially-aware k-means clustering (adopted from `Alexandrov and Kobarg <https://doi.org/10.1093/bioinformatics/btr246>`_)
- spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows selection of the number of clusters (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_)

**Output**

- Pdf with the heatmaps and plots for the segmentation
- Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids)
- Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package
- Optional: svg file with the first segmentation image

        ]]>
    </help>
    <expand macro="citations"/>
</tool>