diff segmentation.xml @ 2:034885df9b09 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author galaxyp
date Fri, 15 Feb 2019 10:16:36 -0500
parents 98d48f081ad9
children 09b638ceee45
line wrap: on
line diff
--- a/segmentation.xml	Thu Oct 25 07:25:52 2018 -0400
+++ b/segmentation.xml	Fri Feb 15 10:16:36 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.1">
+<tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.2">
     <description>mass spectrometry imaging spatial clustering</description>
     <macros>
         <import>macros.xml</import>
@@ -26,13 +26,18 @@
 library(gridExtra)
 library(lattice)
 
-@READING_MSIDATA@
 
 
-## create full matrix to make processed imzML files compatible with segmentation
-iData(msidata) <- iData(msidata)[] 
+@READING_MSIDATA_INRAM@
+
+## to make sure that processed files work as well: 
+iData(msidata) = iData(msidata)[]
 
-@DATA_PROPERTIES@
+## count and print number of NAs, all methods are not compatible with NAs
+print(paste0("Number of NA in dataset: ", sum(is.na(spectra(msidata)[])), " - segmentation does not work with NA values"))
+
+@DATA_PROPERTIES_INRAM@
+
 
 ######################################## PDF ###################################
 ################################################################################
@@ -49,7 +54,7 @@
 #############################################################################
 grid.table(property_df, rows= NULL)
 
-if (npeaks > 0)
+if (npeaks > 0 && sum(is.na(spectra(msidata)[]))==0)
 {
 
 ######################## II) segmentation tools #############################
@@ -81,10 +86,25 @@
 
             component_vector = character()
             for (numberofcomponents in 1:$segm_cond.pca_ncomp)
-            {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
+                {component_vector[numberofcomponents]= paste0("PC", numberofcomponents)}
+
             pca_result = PCA(msidata, ncomp=$segm_cond.pca_ncomp, column = component_vector, superpose = FALSE, 
             method = "$segm_cond.pca_method", scale = $segm_cond.pca_scale, layout = c(ncomp, 1))
 
+            ## remove msidata to clean up RAM space
+            rm(msidata)
+            gc()
+
+            ### table in pdf file
+            plot(0,type='n',axes=FALSE,ann=FALSE)
+            sd_table = as.data.frame(round(pca_result@resultData\$ncomp\$sdev, digits=2))
+            colnames(sd_table) = "Standard deviation"
+            PC_vector = character()
+            for (PCs in 1:$segm_cond.pca_ncomp){
+                PC_vector[[PCs]] = c(paste0("PC",PCs))}
+            sd_table = cbind(PC_vector, sd_table)
+            colnames(sd_table)[1] = "Principal components"
+            grid.table(sd_table, rows=NULL)
             ### images in pdf file
             print(image(pca_result, main="PCA image", lattice=lattice_input, strip = strip_input, col=colourvector, ylim=c(maximumy+2, minimumy-2)))
             for (PCs in 1:$segm_cond.pca_ncomp){
@@ -95,10 +115,10 @@
                 print(plot(pca_result, column = c(paste0("PC",PCs)),superpose = FALSE))}
 
             ### values in tabular files
-            pcaloadings = (pca_result@resultData\$ncomp\$loadings) ### loading for each m/z value
+            pcaloadings = formatC(pca_result@resultData\$ncomp\$loadings, format = "e", digits = 6)### loading for each m/z value
             pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings)
             colnames(pcaloadings2) = c("mz", colnames(pcaloadings))
-            pcascores = (pca_result@resultData\$ncomp\$scores) ### scores for each pixel
+            pcascores = round(pca_result@resultData\$ncomp\$scores, digits=6) ### scores for each pixel
 
             ## pixel names and coordinates
             ## to remove potential sample names and z dimension, split at comma and take only x and y 
@@ -125,6 +145,11 @@
             ##k-means
 
             skm = spatialKMeans(msidata, r=c($segm_cond.kmeans_r), k=c($segm_cond.kmeans_k), method="$segm_cond.kmeans_method")
+
+            ## remove msidata to clean up RAM space
+            rm(msidata)
+            gc()
+
             print(image(skm, key=TRUE, main="K-means clustering", lattice=lattice_input, strip=strip_input, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
 
             print(plot(skm, main="K-means plot", lattice=lattice_input, col= colourvector, strip=strip_input, layout=c(1,1)))
@@ -162,6 +187,9 @@
             ##centroids
 
             ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method")
+            ## remove msidata to clean up RAM space
+            rm(msidata)
+            gc()
             print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=lattice_input, strip = strip_input, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
             print(plot(ssc, main="Spatial shrunken centroids plot", lattice=lattice_input, col= colourvector, strip = strip_input,layout=c(1,1)))
             print(plot(ssc, mode = "tstatistics",key = TRUE, lattice=lattice_input, layout = c(1,1), main="t-statistics", col=colourvector))
@@ -274,7 +302,7 @@
                 <when value="lattice_image"/>
             </conditional>
             <repeat name="colours" title="Colours for the plots" min="1" max="50">
-                <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of columns should be the same as number of components">
+                <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components">
                   <sanitizer>
                     <valid initial="string.letters,string.digits">
                       <add value="#" />
@@ -286,7 +314,7 @@
         <param name="setseed" type="integer" value="1" label="set seed" help="Use same value to reproduce previous results"/>
     </inputs>
     <outputs>
-        <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}"/>
+        <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/>
         <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/>
         <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/>
         <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData">
@@ -305,7 +333,14 @@
                 <param name="feature_color" value="#0000FF"/>
             </repeat>
             <output name="segmentationimages" file="pca_imzml.pdf" compare="sim_size"/>
-            <output name="mzfeatures" file="loadings_pca.tabular"/>
+            <output name="mzfeatures">
+                <assert_contents>
+                    <has_text text="300.17" />
+                    <has_text text="-4.234458e-04" />
+                    <has_text text="3.878545e-10" />
+                    <has_n_columns n="3" />
+                </assert_contents>
+            </output>
             <output name="pixeloutput" file="scores_pca.tabular"/>
         </test>
         <test>