diff segmentation_tool.xml @ 0:0c1a9b68f436 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_segmentation commit da5a0723327f7cce689b230ccd69f3edecb1bc6b
author galaxyp
date Sat, 24 Feb 2018 13:51:32 -0500
parents
children d4158c9955ea
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/segmentation_tool.xml	Sat Feb 24 13:51:32 2018 -0500
@@ -0,0 +1,409 @@
+<tool id="mass_spectrometry_imaging_segmentations" name="MSI segmentation" version="1.7.0">
+    <description>tool for spatial clustering</description>
+    <requirements>
+        <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="2.2.1">r-gridextra</requirement>
+        <requirement type="package" version="2.23-15">r-kernsmooth</requirement>
+        <requirement type="package" version="0.20-35">r-lattice</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+        #if $infile.ext == 'imzml'
+            cp '${infile.extra_files_path}/imzml' infile.imzML &&
+            cp '${infile.extra_files_path}/ibd' infile.ibd &&
+        #elif $infile.ext == 'analyze75'
+            cp '${infile.extra_files_path}/hdr' infile.hdr &&
+            cp '${infile.extra_files_path}/img' infile.img &&
+            cp '${infile.extra_files_path}/t2m' infile.t2m &&
+        #else
+            ln -s $infile infile.RData &&
+        #end if
+        cat '${MSI_segmentation}' &&
+        echo ${MSI_segmentation} &&
+        Rscript '${MSI_segmentation}'
+
+    ]]>
+    </command>
+    <configfiles>
+        <configfile name="MSI_segmentation"><![CDATA[
+
+
+################################# load libraries and read file #########################
+
+
+library(Cardinal)
+library(gridExtra)
+library(KernSmooth)
+library(lattice)
+
+## Read MALDI Imaging dataset
+
+#if $infile.ext == 'imzml'
+    msidata <- readMSIData('infile.imzML')
+#elif $infile.ext == 'analyze75'
+    msidata <- readMSIData('infile.hdr')
+#else
+    load('infile.RData')
+#end if
+
+###################################### file properties in numbers ######################
+
+## Number of features (mz)
+maxfeatures = length(features(msidata))
+## Range mz
+minmz = round(min(mz(msidata)), digits=2)
+maxmz = round(max(mz(msidata)), digits=2)
+## Number of spectra (pixels)
+pixelcount = length(pixels(msidata))
+## Range x coordinates
+minimumx = min(coord(msidata)[,1])
+maximumx = max(coord(msidata)[,1])
+## Range y coordinates
+minimumy = min(coord(msidata)[,2])
+maximumy = max(coord(msidata)[,2])
+## Range of intensities
+minint = round(min(spectra(msidata)[]), digits=2)
+maxint = round(max(spectra(msidata)[]), digits=2)
+medint = round(median(spectra(msidata)[]), digits=2)
+## Number of intensities > 0
+npeaks= sum(spectra(msidata)[]>0)
+## Spectra multiplied with mz (potential number of peaks)
+numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
+## Percentage of intensities > 0
+percpeaks = round(npeaks/numpeaks*100, digits=2)
+## Number of empty TICs
+TICs = colSums(spectra(msidata)[]) 
+NumemptyTIC = sum(TICs == 0)
+
+
+## Processing informations
+processinginfo = processingData(msidata)
+centroidedinfo = processinginfo@centroided # TRUE or FALSE
+
+## if TRUE write processinginfo if no write FALSE
+
+## normalization
+if (length(processinginfo@normalization) == 0) {
+  normalizationinfo='FALSE'
+} else {
+  normalizationinfo=processinginfo@normalization
+}
+## smoothing
+if (length(processinginfo@smoothing) == 0) {
+  smoothinginfo='FALSE'
+} else {
+  smoothinginfo=processinginfo@smoothing
+}
+## baseline
+if (length(processinginfo@baselineReduction) == 0) {
+  baselinereductioninfo='FALSE'
+} else {
+  baselinereductioninfo=processinginfo@baselineReduction
+}
+## peak picking
+if (length(processinginfo@peakPicking) == 0) {
+  peakpickinginfo='FALSE'
+} else {
+  peakpickinginfo=processinginfo@peakPicking
+}
+
+#############################################################################
+
+properties = c("Number of mz features",
+               "Range of mz values [Da]",
+               "Number of pixels", 
+               "Range of x coordinates", 
+               "Range of y coordinates",
+               "Range of intensities", 
+               "Median of intensities",
+               "Intensities > 0",
+               "Number of zero TICs",
+               "Preprocessing", 
+               "Normalization", 
+               "Smoothing",
+               "Baseline reduction",
+               "Peak picking",
+               "Centroided")
+
+values = c(paste0(maxfeatures), 
+           paste0(minmz, " - ", maxmz), 
+           paste0(pixelcount), 
+           paste0(minimumx, " - ", maximumx),  
+           paste0(minimumy, " - ", maximumy), 
+           paste0(minint, " - ", maxint), 
+           paste0(medint),
+           paste0(percpeaks, " %"), 
+           paste0(NumemptyTIC), 
+           paste0(" "),
+           paste0(normalizationinfo),
+           paste0(smoothinginfo),
+           paste0(baselinereductioninfo),
+           paste0(peakpickinginfo),
+           paste0(centroidedinfo))
+
+property_df = data.frame(properties, values)
+
+
+######################################## PDF #############################################
+##########################################################################################
+##########################################################################################
+
+
+pdf("segmentationpdf.pdf", fonts = "Times", pointsize = 12)
+plot(0,type='n',axes=FALSE,ann=FALSE)
+
+title(main=paste0("Spatial segmentation for file: \n\n", "$infile.display_name"))
+
+
+############################# I) numbers ####################################
+#############################################################################
+grid.table(property_df, rows= NULL)
+
+if (npeaks > 0)
+{
+
+
+######################## II) segmentation tools #############################
+#############################################################################
+        #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours])
+        colourvector = c($color_string)
+
+
+        #if str( $segm_cond.segmentationtool ) == 'pca':
+            print('pca')
+            ##pca
+            
+            component_vector = character()
+            for (numberofcomponents in 1:$segm_cond.pca_ncomp)
+            {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
+            pca <- PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, 
+            method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))
+
+            print(image(pca, main="PCA image", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.pca_imagecontrast", smooth.image = "$segm_cond.pca_imagesmoothing", col=colourvector))
+            print(plot(pca, main="PCA plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
+
+
+            pcaloadings = (pca@resultData\$ncomp\$loadings) ### loading for each mz value
+            pcascores = (pca@resultData\$ncomp\$scores) ### scores for each pixel
+
+            write.table(pcaloadings, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            write.table(pcascores, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+        #elif str( $segm_cond.segmentationtool ) == 'kmeans':
+            print('kmeans')
+            ##k-means
+
+            skm <- spatialKMeans(msidata, r=$segm_cond.kmeans_r, k=$segm_cond.kmeans_k, method="$segm_cond.kmeans_method")
+            print(image(skm, key=TRUE, main="K-means clustering", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.kmeans_imagecontrast", col= colourvector, smooth.image = "$segm_cond.kmeans_imagesmoothing"))
+            print(plot(skm, main="K-means plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
+
+
+            skm_clusters = (skm@resultData\$r\$cluster)
+            skm_toplabels = topLabels(skm, n=500)
+    
+            write.table(skm_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            write.table(skm_clusters, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+
+        #elif str( $segm_cond.segmentationtool ) == 'centroids':
+            print('centroids')
+            ##centroids
+
+            ssc <- spatialShrunkenCentroids(msidata, r=$segm_cond.centroids_r, k=$segm_cond.centroids_k, s=$segm_cond.centroids_s, method="$segm_cond.centroids_method")
+            print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=TRUE, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)), contrast.enhance = "$segm_cond.centroids_imagecontrast", col= colourvector, smooth.image = "$segm_cond.centroids_imagesmoothing"))
+            print(plot(ssc, main="Spatial shrunken centroids plot", lattice=TRUE, col= colourvector, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))))
+
+            ssc_classes = (ssc@resultData\$r\$classes)
+            ssc_toplabels =  topLabels(ssc, n=500)
+
+            write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+
+        #end if
+
+    dev.off()
+
+}else{
+    print("Inputfile has no intensities > 0")
+    dev.off()
+}
+
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="imzml, rdata, analyze75"
+               label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
+                help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
+            <conditional name="segm_cond">
+                <param name="segmentationtool" type="select" label="Select the tool for spatial clustering.">
+                    <option value="pca" selected="True">pca</option>
+                    <option value="kmeans">k-means</option>
+                    <option value="centroids">shrunken centroids</option>
+                </param>
+                <when value="pca">
+                    <param name="pca_ncomp" type="integer" value="2"
+                           label="The number of principal components to calculate."/>
+                    <param name="pca_method" type="select" 
+                           label="The function used to calculate the singular value decomposition.">
+                        <option value="irlba" selected="True">irlba</option>
+                        <option value="svd">svd</option>
+                    </param>
+                    <param name="pca_scale" type="select" display="radio" optional="False"
+                           label="Shoud the data be scaled first?">
+                        <option value="TRUE">yes</option>
+                        <option value="FALSE" selected="True">no</option>
+                </param>
+                <param name="pca_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
+                    <option value="none" selected="True">none</option>
+                    <option value="suppression">suppression</option>
+                    <option value="histogram">histogram</option>
+                </param>
+                <param name="pca_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
+                    <option value="none" selected="True">none</option>
+                    <option value="gaussian">gaussian</option>
+                    <option value="adaptive">adaptive</option>
+                </param>
+                </when> 
+
+                <when value="kmeans">
+                    <param name="kmeans_r" type="text" value="2"
+                           label="The spatial neighborhood radius of nearby pixels to consider (r)."/>
+                    <param name="kmeans_k" type="text" value="3"
+                           label="The number of clusters (k)."/>
+                    <param name="kmeans_method" type="select" display="radio"
+                           label="The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) clustering, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) clustering.">
+                        <option value="gaussian">gaussian</option>
+                        <option value="adaptive" selected="True">adaptive</option>
+                </param>
+                <param name="kmeans_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
+                    <option value="none" selected="True">none</option>
+                    <option value="suppression">suppression</option>
+                    <option value="histogram">histogram</option>
+                </param>
+                <param name="kmeans_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
+                    <option value="none" selected="True">none</option>
+                    <option value="gaussian">gaussian</option>
+                    <option value="adaptive">adaptive</option>
+                </param>
+                </when>
+
+                <when value="centroids">
+                    <param name="centroids_r" type="text" value="2"
+                           label="The spatial neighborhood radius of nearby pixels to consider (r)."/>
+                    <param name="centroids_k" type="text" value="5"
+                           label="The initial number of clusters (k)."/>
+                    <param name="centroids_s" type="integer" value="2"
+                           label="The sparsity thresholding parameter by which to shrink the t-statistics (s)."
+                           help="As s increases, fewer mass features (m/z values) will be used in the spatial segmentation, and only the informative mass features will be retained."/>
+                    <param name="centroids_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights.">
+                        <option value="gaussian" selected="True">gaussian</option>
+                        <option value="adaptive">adaptive</option>
+                </param>
+                <param name="centroids_imagecontrast" type="select" label="Select a contrast enhancement function." help="The 'histogram' equalization method flatterns the distribution of intensities. The hotspot 'suppression' method uses thresholding to reduce the intensities of hotspots">
+                    <option value="none" selected="True">none</option>
+                    <option value="suppression">suppression</option>
+                    <option value="histogram">histogram</option>
+                </param>
+                <param name="centroids_imagesmoothing" type="select" label="Select an image smoothing function." help="The 'gaussian' smoothing method smooths images with a simple gaussian kernel. The 'adaptive' method uses bilateral filtering to preserve edges.">
+                    <option value="none" selected="True">none</option>
+                    <option value="gaussian">gaussian</option>
+                    <option value="adaptive">adaptive</option>
+                </param>
+                </when>
+            </conditional>
+            <repeat name="colours" title="Colours for the plots" min="1" max="50">
+                <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components">
+                  <sanitizer>
+                    <valid initial="string.letters,string.digits">
+                      <add value="#" />
+                    </valid>
+                  </sanitizer>
+                </param>
+            </repeat>
+    </inputs>
+    <outputs>
+        <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on $infile.display_name"/>
+        <data format="tabular" name="mzfeatures" label="mzfeatures ${tool.name} on $infile.display_name"/>
+        <data format="tabular" name="pixeloutput" label="pixels ${tool.name} on $infile.display_name"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="" ftype="imzml">
+                <composite_data value="Example_Continuous.imzML"/>
+                <composite_data value="Example_Continuous.ibd"/>
+            </param>
+            <param name="segmentationtool" value="pca"/>
+            <repeat name="colours">
+                <param name="feature_color" value="#ff00ff"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#0000FF"/>
+            </repeat>
+            <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size" delta="20000"/>
+            <output name="mzfeatures" file="pcaloadings_results1.txt" compare="sim_size"/>
+            <output name="pixeloutput" file="pcascores_results1.txt" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="infile" value="" ftype="analyze75">
+                <composite_data value="Analyze75.hdr" />
+                <composite_data value="Analyze75.img" />
+                <composite_data value="Analyze75.t2m" />
+            </param>
+            <param name="segmentationtool" value="kmeans"/>
+            <repeat name="colours">
+                <param name="feature_color" value="#ff00ff"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#0000FF"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#00C957"/>
+            </repeat>
+            <output name="segmentationimages" file="kmeans_imzml.pdf" compare="sim_size" delta="20000"/>
+            <output name="mzfeatures" file="toplabels_results1.txt" compare="sim_size"/>
+            <output name="pixeloutput" file="cluster_results1.txt" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/>
+            <param name="segmentationtool" value="centroids"/>
+            <repeat name="colours">
+                <param name="feature_color" value="#0000FF"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#00C957"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#B0171F"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#FFD700"/>
+            </repeat>
+            <repeat name="colours">
+                <param name="feature_color" value="#848484"/>
+            </repeat>
+            <output name="segmentationimages" file="centroids_imzml.pdf" compare="sim_size" delta="20000"/>
+            <output name="mzfeatures" file="toplabels_results1.txt" compare="sim_size"/>
+            <output name="pixeloutput" file="classes_results1.txt" compare="sim_size"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+Spatially aware segmentation of mass-spectrometry imaging data by unsupervised clustering algorithms. Underlying structures can be identified with the following tools: pca, k-means clustering and spatial shrunken centroids. The spatialShrunkenCentroids method allows the number of segments to decrease according to the data. This allows automatic selection of the number
+of clusters.
+
+Input data: 3 types of input data can be used:
+
+- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_
+- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
+- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
+
+The output of this tool contains a pdf with plots from the segmentation tools. 
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv146</citation>
+    </citations>
+</tool>