comparison segmentation.xml @ 4:9f7d1ec01767 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:15:15 -0400
parents 09b638ceee45
children 4a2ac25d1063
comparison
equal deleted inserted replaced
3:09b638ceee45 4:9f7d1ec01767
1 <tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.2"> 1 <tool id="cardinal_segmentations" name="MSI segmentation" version="@VERSION@.3">
2 <description>mass spectrometry imaging spatial clustering</description> 2 <description>mass spectrometry imaging spatial clustering</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
28 28
29 29
30 30
31 @READING_MSIDATA_INRAM@ 31 @READING_MSIDATA_INRAM@
32 32
33 ## to make sure that processed files work as well:
34 iData(msidata) = iData(msidata)[]
35 33
36 ## remove duplicated coordinates 34 ## remove duplicated coordinates
37 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
38 msidata <- msidata[,!duplicated(coord(msidata))] 35 msidata <- msidata[,!duplicated(coord(msidata))]
39 36
40 ## count and print number of NAs, all methods are not compatible with NAs
41 print(paste0("Number of NA in dataset: ", sum(is.na(spectra(msidata)[])), " - segmentation does not work with NA values"))
42 37
43 @DATA_PROPERTIES_INRAM@ 38 @DATA_PROPERTIES_INRAM@
44 39
45 40
46 ######################################## PDF ################################### 41 ######################################## PDF ###################################
56 51
57 ############################# I) numbers #################################### 52 ############################# I) numbers ####################################
58 ############################################################################# 53 #############################################################################
59 grid.table(property_df, rows= NULL) 54 grid.table(property_df, rows= NULL)
60 55
61 if (npeaks > 0 && sum(is.na(spectra(msidata)[]))==0) 56 if (npeaks > 0 && sum(is.na(spectra(msidata)))==0)
62 { 57 {
63 58
64 ######################## II) segmentation tools ############################# 59 ######################## II) segmentation tools #############################
65 ############################################################################# 60 #############################################################################
66 #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours]) 61 #set $color_string = ','.join(['"%s"' % $color.feature_color for $color in $colours])
67 colourvector = c($color_string) 62 colourvector = c($color_string)
68 63
69 ### preparation for images and plots: 64 ### preparation for images and plots:
70 #if str($image_cond.image_type) == "standard_image": 65 #if str($image_type) == "standard_image":
71 print("standard image") 66 print("standard image")
72 67
73 strip_input = TRUE 68 strip_input = FALSE
74 lattice_input = FALSE 69 lattice_input = FALSE
75 70
76 #elif str($image_cond.image_type) == "lattice_image": 71 #elif str($image_type) == "lattice_image":
77 print("lattice image") 72 print("lattice image")
78 73
79 strip_input = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9)) 74 strip_input = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.9))
80 lattice_input = TRUE 75 lattice_input = TRUE
81 76
110 colnames(sd_table)[1] = "Principal components" 105 colnames(sd_table)[1] = "Principal components"
111 grid.table(sd_table, rows=NULL) 106 grid.table(sd_table, rows=NULL)
112 ### images in pdf file 107 ### images in pdf file
113 print(image(pca_result, main="PCA image", lattice=lattice_input, strip = strip_input, col=colourvector, ylim=c(maximumy+2, minimumy-2))) 108 print(image(pca_result, main="PCA image", lattice=lattice_input, strip = strip_input, col=colourvector, ylim=c(maximumy+2, minimumy-2)))
114 for (PCs in 1:$segm_cond.pca_ncomp){ 109 for (PCs in 1:$segm_cond.pca_ncomp){
115 print(image(pca_result, column = c(paste0("PC",PCs)), lattice=lattice_input, superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} 110 print(image(pca_result, column = c(paste0("PC",PCs)), lattice=lattice_input,strip = strip_input, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
116 ### plots in pdf file 111 ### plots in pdf file
117 print(plot(pca_result, main="PCA plot", lattice=lattice_input, col= colourvector, strip = strip_input)) 112 print(plot(pca_result, main="PCA plot", lattice=lattice_input, col= colourvector, strip = strip_input))
118 for (PCs in 1:$segm_cond.pca_ncomp){ 113 for (PCs in 1:$segm_cond.pca_ncomp){
119 print(plot(pca_result, column = c(paste0("PC",PCs)),superpose = FALSE))} 114 print(plot(pca_result, column = c(paste0("PC",PCs)),main=paste0("PC", PCs),strip = FALSE,superpose = FALSE))}
120 115
121 ### values in tabular files 116 ### values in tabular files
122 pcaloadings = formatC(pca_result@resultData\$ncomp\$loadings, format = "e", digits = 6)### loading for each m/z value 117 pcaloadings = formatC(pca_result@resultData\$ncomp\$loadings, format = "e", digits = 6)### loading for each m/z value
123 pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings) 118 pcaloadings2 = cbind(matrix(unlist(strsplit(rownames(pcaloadings), " = ")), ncol=2, byrow=TRUE)[,2], pcaloadings)
124 colnames(pcaloadings2) = c("mz", colnames(pcaloadings)) 119 colnames(pcaloadings2) = c("mz", colnames(pcaloadings))
153 ## remove msidata to clean up RAM space 148 ## remove msidata to clean up RAM space
154 rm(msidata) 149 rm(msidata)
155 gc() 150 gc()
156 151
157 print(image(skm, key=TRUE, main="K-means clustering", lattice=lattice_input, strip=strip_input, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) 152 print(image(skm, key=TRUE, main="K-means clustering", lattice=lattice_input, strip=strip_input, col= colourvector, layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
158
159 print(plot(skm, main="K-means plot", lattice=lattice_input, col= colourvector, strip=strip_input, layout=c(1,1))) 153 print(plot(skm, main="K-means plot", lattice=lattice_input, col= colourvector, strip=strip_input, layout=c(1,1)))
160 154
161 skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) 155 skm_clusters = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
162 for (iteration in 1:length(skm@resultData)){ 156 for (iteration in 1:length(skm@resultData)){
163 skm_cluster = ((skm@resultData)[[iteration]]\$cluster) 157 skm_cluster = ((skm@resultData)[[iteration]]\$cluster)
192 186
193 ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method") 187 ssc = spatialShrunkenCentroids(msidata, r=c($segm_cond.centroids_r), k=c($segm_cond.centroids_k), s=c($segm_cond.centroids_s), method="$segm_cond.centroids_method")
194 ## remove msidata to clean up RAM space 188 ## remove msidata to clean up RAM space
195 rm(msidata) 189 rm(msidata)
196 gc() 190 gc()
197 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=lattice_input, strip = strip_input, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2))) 191 print(image(ssc, key=TRUE, main="Spatial shrunken centroids", lattice=lattice_input, strip = TRUE, col= colourvector,layout=c(1,1), ylim=c(maximumy+2, minimumy-2)))
198 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=lattice_input, col= colourvector, strip = strip_input,layout=c(1,1))) 192 print(plot(ssc, main="Spatial shrunken centroids plot", lattice=lattice_input, col= colourvector, strip = TRUE,layout=c(1,1)))
199 print(plot(ssc, mode = "tstatistics",key = TRUE, lattice=lattice_input, layout = c(1,1), main="t-statistics", col=colourvector)) 193 print(plot(ssc, mode = "tstatistics",key = TRUE, lattice=lattice_input, layout = c(1,1), main="t-statistics", col=colourvector))
194
200 plot(summary(ssc), main = "Number of segments") 195 plot(summary(ssc), main = "Number of segments")
201 196
202 ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0)) 197 ssc_classes = data.frame(matrix(NA, nrow = pixelcount, ncol = 0))
203 for (iteration in 1:length(ssc@resultData)){ 198 for (iteration in 1:length(ssc@resultData)){
204 ssc_class = ((ssc@resultData)[[iteration]]\$classes) 199 ssc_class = ((ssc@resultData)[[iteration]]\$classes)
228 #end if 223 #end if
229 224
230 #end if 225 #end if
231 226
232 dev.off() 227 dev.off()
228
229 ## optional svg output with original coordinates
230 #if $svg_pixelimage:
231 print("svg image")
232 ## reverse y axis for svg output = correct order and nice svg image
233
234
235 svg(file="svg_pixel_output.svg", width=maximumx, height=maximumy)
236 par(mar=c(0,0,0,0))
237 #if str( $segm_cond.segmentationtool ) == 'pca':
238 coord(pca_result)\$y <- max(coord(pca_result)\$y) - coord(pca_result)\$y + 1
239 image(pca_result, strip = FALSE, colorkey=FALSE, axes=FALSE, xlab=NA, ylab=NA, col=colourvector)
240 #elif str( $segm_cond.segmentationtool ) == 'kmeans':
241 coord(skm)\$y <- max(coord(skm)\$y) - coord(skm)\$y + 1
242 image(skm, key=FALSE, strip=FALSE, col= colourvector)
243 #elif str( $segm_cond.segmentationtool ) == 'centroids':
244 coord(ssc)\$y <- max(coord(ssc)\$y) - coord(ssc)\$y + 1
245 image(ssc, key=FALSE, strip = FALSE, col= colourvector)
246 #end if
247 dev.off()
248 #end if
249
233 250
234 }else{ 251 }else{
235 print("Inputfile has no intensities > 0") 252 print("Inputfile has no intensities > 0")
236 dev.off() 253 dev.off()
237 } 254 }
254 <option value="irlba" selected="True">irlba</option> 271 <option value="irlba" selected="True">irlba</option>
255 <option value="svd">svd</option> 272 <option value="svd">svd</option>
256 </param> 273 </param>
257 <param name="pca_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Scaling of data before analysis"/> 274 <param name="pca_scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Scaling of data before analysis"/>
258 </when> 275 </when>
259
260 <when value="kmeans"> 276 <when value="kmeans">
261 <param name="kmeans_r" type="text" value="2" 277 <param name="kmeans_r" type="text" value="2"
262 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"> 278 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)">
263 <expand macro="sanitizer_multiple_digits"/> 279 <expand macro="sanitizer_multiple_digits"/>
264 </param> 280 </param>
295 </param> 311 </param>
296 <param name="centroids_toplabels" type="integer" value="500" 312 <param name="centroids_toplabels" type="integer" value="500"
297 label="Number of toplabels (m/z) which should be written in tabular output"/> 313 label="Number of toplabels (m/z) which should be written in tabular output"/>
298 </when> 314 </when>
299 </conditional> 315 </conditional>
300 <conditional name="image_cond"> 316 <param name="image_type" type="boolean" checked="True" truevalue="standard_image" falsevalue="lattice_image"
301 <param name="image_type" type="select" label="Select the image type"> 317 label="Standard image" help="No: lattice function is used to display image"/>
302 <option value="standard_image" selected="True">standard</option> 318 <param name="svg_pixelimage" type="boolean" label="Export first segmentation image as svg"/>
303 <option value="lattice_image">lattice</option>
304 </param>
305 <when value="standard_image"/>
306 <when value="lattice_image"/>
307 </conditional>
308 <repeat name="colours" title="Colours for the plots" min="1" max="50"> 319 <repeat name="colours" title="Colours for the plots" min="1" max="50">
309 <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components"> 320 <param name="feature_color" type="color" label="Colours" value="#ff00ff" help="Numbers of colours should be the same as number of components">
310 <sanitizer> 321 <sanitizer>
311 <valid initial="string.letters,string.digits"> 322 <valid initial="string.letters,string.digits">
312 <add value="#" /> 323 <add value="#" />
321 <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> 332 <data format="pdf" name="segmentationimages" from_work_dir="segmentationpdf.pdf" label = "${tool.name} on ${on_string}: results"/>
322 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> 333 <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/>
323 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> 334 <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/>
324 <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData"> 335 <data format="rdata" name="segmentation_rdata" label="${tool.name} on ${on_string}: results.RData">
325 <filter>output_rdata</filter> 336 <filter>output_rdata</filter>
337 </data>
338 <data format="svg" name="svg_output" from_work_dir="svg_pixel_output.svg" label="${tool.name} on ${on_string}: image.svg">
339 <filter>svg_pixelimage</filter>
326 </data> 340 </data>
327 </outputs> 341 </outputs>
328 <tests> 342 <tests>
329 <test> 343 <test>
330 <expand macro="infile_imzml"/> 344 <expand macro="infile_imzml"/>
402 ----- 416 -----
403 417
404 This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data. 418 This tool provides three different Cardinal functions for unsupervised clustering/spatial segmentation of mass spectrometry imaging data.
405 419
406 @MSIDATA_INPUT_DESCRIPTION@ 420 @MSIDATA_INPUT_DESCRIPTION@
421 - NA intensities are not allowed
422 - duplicated coordinates will be removed
423
407 424
408 **Options** 425 **Options**
409 426
410 - PCA: principal component analysis 427 - PCA: principal component analysis
411 - k-means: spatially-aware k-means clustering 428 - k-means: spatially-aware k-means clustering (adopted from `Alexandrov and Kobarg <https://doi.org/10.1093/bioinformatics/btr246>`_)
412 - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows automatic selection of the number of clusters 429 - spatial shrunken centroids: Allows the number of segments to decrease according to the data. This allows selection of the number of clusters (more details in `Bemis et al. <https://doi.org/10.1074/mcp.O115.053918>`_)
413 430
414 **Output** 431 **Output**
415 432
416 - Pdf with the heatmaps and plots for the segmentation 433 - Pdf with the heatmaps and plots for the segmentation
417 - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids) 434 - Tabular file with information on m/z and pixels: loadings/scores (PCA), toplabels/clusters (k-means), toplabels/classes (spatial shrunken centroids)
418 - Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package 435 - Optional .RData file which contains the segmentation results and can be used for further exploration in R using the Cardinal package
436 - Optional: svg file with the first segmentation image
419 437
420 ]]> 438 ]]>
421 </help> 439 </help>
422 <expand macro="citations"/> 440 <expand macro="citations"/>
423 </tool> 441 </tool>