view segmentation_tool.xml @ 2:f66c5789deac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_segmentation commit ed7d3e6f1a09c78c8f71cc1bdc1a20249767f646
author galaxyp
date Sun, 11 Mar 2018 10:39:01 -0400
parents d4158c9955ea
children 830c6df59603
line wrap: on
line source

<tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.7.0.2">
    <description>tool for spatial clustering</description>
    <requirements>
        <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
        <requirement type="package" version="2.2.1">r-gridextra</requirement>
        <requirement type="package" version="2.23-15">r-kernsmooth</requirement>
        <requirement type="package" version="0.20-35">r-lattice</requirement>
    </requirements>
    <command detect_errors="exit_code">
    <![CDATA[

        #if $infile.ext == 'imzml'
            cp '${infile.extra_files_path}/imzml' infile.imzML &&
            cp '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            cp '${infile.extra_files_path}/hdr' infile.hdr &&
            cp '${infile.extra_files_path}/img' infile.img &&
            cp '${infile.extra_files_path}/t2m' infile.t2m &&
        #else
            ln -s $infile infile.RData &&
        #end if
        cat '${MSI_segmentation}' &&
        echo ${MSI_segmentation} &&
        Rscript '${MSI_segmentation}'

    ]]>
    </command>
    <configfiles>
        <configfile name="MSI_segmentation"><![CDATA[


################################# load libraries and read file #########################


library(Cardinal)
library(gridExtra)
library(KernSmooth)
library(lattice)

## Read MALDI Imaging dataset

#if $infile.ext == 'imzml'
    msidata = readMSIData('infile.imzML')
#elif $infile.ext == 'analyze75'
    msidata = readMSIData('infile.hdr')
#else
    load('infile.RData')
#end if

###################################### file properties in numbers ######################

## Number of features (mz)
maxfeatures = length(features(msidata))
## Range mz
minmz = round(min(mz(msidata)), digits=2)
maxmz = round(max(mz(msidata)), digits=2)
## Number of spectra (pixels)
pixelcount = length(pixels(msidata))
## Range x coordinates
minimumx = min(coord(msidata)[,1])
maximumx = max(coord(msidata)[,1])
## Range y coordinates
minimumy = min(coord(msidata)[,2])
maximumy = max(coord(msidata)[,2])
## Range of intensities
minint = round(min(spectra(msidata)[]), digits=2)
maxint = round(max(spectra(msidata)[]), digits=2)
medint = round(median(spectra(msidata)[]), digits=2)
## Number of intensities > 0
npeaks= sum(spectra(msidata)[]>0)
## Spectra multiplied with mz (potential number of peaks)
numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
## Percentage of intensities > 0
percpeaks = round(npeaks/numpeaks*100, digits=2)
## Number of empty TICs
TICs = colSums(spectra(msidata)[]) 
NumemptyTIC = sum(TICs == 0)


## Processing informations
processinginfo = processingData(msidata)
centroidedinfo = processinginfo@centroided # TRUE or FALSE

## if TRUE write processinginfo if no write FALSE

## normalization
if (length(processinginfo@normalization) == 0) {
  normalizationinfo='FALSE'
} else {
  normalizationinfo=processinginfo@normalization
}
## smoothing
if (length(processinginfo@smoothing) == 0) {
  smoothinginfo='FALSE'
} else {
  smoothinginfo=processinginfo@smoothing
}
## baseline
if (length(processinginfo@baselineReduction) == 0) {
  baselinereductioninfo='FALSE'
} else {
  baselinereductioninfo=processinginfo@baselineReduction
}
## peak picking
if (length(processinginfo@peakPicking) == 0) {
  peakpickinginfo='FALSE'
} else {
  peakpickinginfo=processinginfo@peakPicking
}

#############################################################################

properties = c("Number of mz features",
               "Range of mz values [Da]",
               "Number of pixels", 
               "Range of x coordinates", 
               "Range of y coordinates",
               "Range of intensities", 
               "Median of intensities",
               "Intensities > 0",
               "Number of zero TICs",
               "Preprocessing", 
               "Normalization", 
               "Smoothing",
               "Baseline reduction",
               "Peak picking",
               "Centroided")

values = c(paste0(maxfeatures), 
           paste0(minmz, " - ", maxmz), 
           paste0(pixelcount), 
           paste0(minimumx, " - ", maximumx),  
           paste0(minimumy, " - ", maximumy), 
           paste0(minint, " - ", maxint), 
           paste0(medint),
           paste0(percpeaks, " %"), 
           paste0(NumemptyTIC), 
           paste0(" "),
           paste0(normalizationinfo),
           paste0(smoothinginfo),
           paste0(baselinereductioninfo),
           paste0(peakpickinginfo),
           paste0(centroidedinfo))

property_df = data.frame(properties, values)


######################################## PDF #############################################
##########################################################################################
##########################################################################################


pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name"))


############################# I) numbers ####################################
#############################################################################
grid.table(property_df, rows= NULL)

if (npeaks > 0)
{


######################## II) segmentation tools #############################
#############################################################################
        #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours])
        colourvector = c($color_string)


        #if str( $segm_cond.segmentationtool ) == 'pca':
            print('pca')
            ##pca
            
            component_vector = character()
            for (numberofcomponents in 1:$segm_cond.pca_ncomp)
            {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
            pca = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, 
            method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))

            print(image(pca, main="PCA image", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col=colourvector, ylim=c(maximumy+2, 0)))
            print(plot(pca, main="PCA plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))


            pcaloadings = (pca@resultData\$ncomp\$loadings) ### loading for each mz value
            pcascores = (pca@resultData\$ncomp\$scores) ### scores for each pixel

            write.table(pcaloadings, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
            write.table(pcascores, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")

        #elif str( $segm_cond.segmentationtool ) == 'kmeans':
            print('kmeans')
            ##k-means

            skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method")
            print(image(skm, key=TRUE, main="K-means clustering", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector, ylim=c(maximumy+2, 0), layout=c(1,1)))

            print(plot(skm, main="K-means plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), layout=c($segm_cond.kmeans_layout)))


            skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
            for (iteration in 1:length(skm@resultData)){
                        skm_cluster = ((skm@resultData)[[iteration]]\$cluster)
            skm_clusters = cbind(skm_clusters, skm_cluster) }
            colnames(skm_clusters) = names((skm@resultData)) 

            skm_toplabels = topLabels(skm, n=$segm_cond.kmeans_toplabels)
    
            write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
            write.table(skm_clusters, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")


        #elif str( $segm_cond.segmentationtool ) == 'centroids':
            print('centroids')
            ##centroids

            ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method")
            print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), col= colourvector,  ylim=c(maximumy+2, 0),layout=c(1,1)))
            print(plot(ssc, main="Spatial shrunken centroids plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)),layout=c($segm_cond.centroids_layout)))

            ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
            for (iteration in 1:length(ssc@resultData)){
            ssc_class = ((ssc@resultData)[[iteration]]\$classes)
            ssc_classes = cbind(ssc_classes, ssc_class) }
            colnames(ssc_classes) = names((ssc@resultData)) 

            ssc_toplabels =  topLabels(ssc, n=$segm_cond.centroids_toplabels)

            write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
            write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")


        #end if

    dev.off()

}else{
    print("Inputfile has no intensities > 0")
    dev.off()
}

    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml, rdata, analyze75"
               label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
                help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
            <conditional name="segm_cond">
                <param name="segmentationtool" type="select" label="Select the tool for spatial clustering.">
                    <option value="pca" selected="True">pca</option>
                    <option value="kmeans">k-means</option>
                    <option value="centroids">shrunken centroids</option>
                </param>
                <when value="pca">
                    <param name="pca_ncomp" type="integer" value="2"
                           label="The number of principal components to calculate."/>
                    <param name="pca_method" type="select" 
                           label="The function used to calculate the singular value decomposition.">
                        <option value="irlba" selected="True">irlba</option>
                        <option value="svd">svd</option>
                    </param>
                    <param name="pca_scale" type="select" display="radio" optional="False"
                           label="Shoud the data be scaled first?">
                        <option value="TRUE">yes</option>
                        <option value="FALSE" selected="True">no</option>
                </param>
                </when> 

                <when value="kmeans">
                    <param name="kmeans_r" type="text" value="2"
                           label="The spatial neighborhood radius of nearby pixels to consider (r)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
                    <param name="kmeans_k" type="text" value="3"
                           label="The number of clusters (k)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
                    <param name="kmeans_method" type="select" display="radio"
                           label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering.">
                        <option value="gaussian">gaussian</option>
                        <option value="adaptive" selected="True">adaptive</option>
                </param>
                <param name="kmeans_toplabels" type="integer" value="500"
                       label="Number of toplabels (masses) which should be written in tabular output"/>
                <param name="kmeans_layout" type="text" value="1,1"
                       label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/>
                 </when>

                <when value="centroids">
                    <param name="centroids_r" type="text" value="2"
                           label="The spatial neighborhood radius of nearby pixels to consider (r)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
                    <param name="centroids_k" type="text" value="5"
                           label="The initial number of clusters (k)." help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/>
                    <param name="centroids_s" type="text" value="2"
                           label="The sparsity thresholding parameter by which to shrink the t-statistics (s)."
                           help="As s increases, fewer mass features (m/z values) will be used in the spatial segmentation, and only the informative mass features will be retained. Multiple values are allowed (e.g. 1,2,3 or 2:5)."/>
                    <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights.">
                        <option value="gaussian" selected="True">gaussian</option>
                        <option value="adaptive">adaptive</option>
                </param>
                <param name="centroids_toplabels" type="integer" value="500"
                       label="Number of toplabels (masses) which should be written in tabular output"/>
                <param name="centroids_layout" type="text" value="1,1"
                       label="Number of rows and columns to plot pictures in pdf output" help="e.g. 1,1 means 1 plot per page; 2,3 means 2 rows with 3 plots each = 6 plots per page"/>
                </when>
            </conditional>
            <repeat name="colours" title="Colours for the plots" min="1" max="50">
                <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components">
                  <sanitizer>
                    <valid initial="string.letters,string.digits">
                      <add value="#" />
                    </valid>
                  </sanitizer>
                </param>
            </repeat>
    </inputs>
    <outputs>
        <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on $infile.display_name"/>
        <data format="tabular" name="mzfeatures" label="mzfeatures ${tool.name} on $infile.display_name"/>
        <data format="tabular" name="pixeloutput" label="pixels ${tool.name} on $infile.display_name"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <param name="segmentationtool" value="pca"/>
            <repeat name="colours">
                <param name="feature_color" value="#ff00ff"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size" delta="20000"/>
            <output name="mzfeatures" file="loadings_pca.tabular" compare="sim_size"/>
            <output name="pixeloutput" file="scores_pca.tabular" compare="sim_size"/>
        </test>
        <test>
            <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr" />
                <composite_data value="Analyze75.img" />
                <composite_data value="Analyze75.t2m" />
            </param>
            <param name="segmentationtool" value="kmeans"/>
            <param name="kmeans_r" value="1:3"/>
            <param name="kmeans_k" value="2,3"/>
            <repeat name="colours">
                <param name="feature_color" value="#ff00ff"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#00C957"/>
            </repeat>
            <output name="segmentationimages" file="kmeans_imzml.pdf" compare="sim_size" delta="20000"/>
            <output name="mzfeatures" file="toplabels_skm.tabular" compare="sim_size"/>
            <output name="pixeloutput" file="cluster_skm.tabular" compare="sim_size"/>
        </test>
        <test>
            <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
            <param name="segmentationtool" value="centroids"/>
            <param name="centroids_r" ftype="text" value="1,2"/>
            <param name="centroids_k" ftype="text" value="5"/>
            <param name="centroids_toplabels" ftype="integer" value="100"/>
            <repeat name="colours">
                <param name="feature_color" value="#0000FF"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#00C957"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#B0171F"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#FFD700"/>
            </repeat>
            <repeat name="colours">
                <param name="feature_color" value="#848484"/>
            </repeat>
            <output name="segmentationimages" file="centroids_imzml.pdf" compare="sim_size" delta="20000"/>
            <output name="mzfeatures" file="toplabels_ssc.tabular" compare="sim_size"/>
            <output name="pixeloutput" file="classes_ssc.tabular" compare="sim_size"/>
        </test>
    </tests>
    <help>
        <![CDATA[

Spatially aware segmentation of mass-spectrometry imaging data by unsupervised clustering algorithms. Underlying structures can be identified with the following tools: pca, k-means clustering and spatial shrunken centroids. The spatialShrunkenCentroids method allows the number of segments to decrease according to the data. This allows automatic selection of the number
of clusters.

Input data: 3 types of input data can be used:

- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)

The output of this tool contains a pdf with plots from the segmentation tools. 
        ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv146</citation>
    </citations>
</tool>