diff msi_preprocessing.xml @ 9:4d5578b57a77 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 13:43:04 -0400
parents d77c5228fd1a
children df8d7f6f210b
line wrap: on
line diff
--- a/msi_preprocessing.xml	Tue Jul 24 04:53:10 2018 -0400
+++ b/msi_preprocessing.xml	Wed Aug 22 13:43:04 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -45,7 +45,11 @@
 #elif $infile.ext == 'analyze75'
     msidata = readAnalyze('infile', attach.only=TRUE)
 #else
-    load('infile.RData')
+    loadRData <- function(fileName){
+    load(fileName)
+    get(ls()[ls() != "fileName"])
+    }
+    msidata = loadRData('infile.RData')
 #end if
 
 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))
@@ -64,8 +68,9 @@
         maxfeatures = length(features(msidata))
         medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
         medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2)
-        TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-        QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs))
+        minmz = round(min(mz(msidata)), digits=2)
+        maxmz = round(max(mz(msidata)), digits=2)
+        QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint))
         vectorofactions = "inputdata"
 
     ############################### Preprocessing steps ###########################
@@ -86,8 +91,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),)
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                normalized = c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, normalized)
                 vectorofactions = append(vectorofactions, "normalized")
 
@@ -104,8 +110,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                baseline= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, baseline)
                 vectorofactions = append(vectorofactions, "baseline red.")
 
@@ -136,8 +143,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                smoothed= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, smoothed)
                 vectorofactions = append(vectorofactions, "smoothed")
 
@@ -147,7 +155,6 @@
             print('Peak_picking')
             ## Peakpicking
 
-
             ## remove duplicated coordinates, otherwise peak picking will fail
             print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
             msidata <- msidata[,!duplicated(coord(msidata))]
@@ -174,8 +181,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                picked= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, picked)
                 vectorofactions = append(vectorofactions, "picked")
 
@@ -220,8 +228,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                aligned= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, aligned)
                 vectorofactions = append(vectorofactions, "aligned")
 
@@ -237,8 +246,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                filtered= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, filtered)
                 vectorofactions = append(vectorofactions, "filtered")
 
@@ -279,8 +289,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                reduced= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, reduced)
                 vectorofactions = append(vectorofactions, "reduced")
 
@@ -308,8 +319,9 @@
                 maxfeatures = length(features(msidata))
                 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
-                transformed= c(maxfeatures, medianpeaks, medint, TICs)
+                minmz = round(min(mz(msidata)), digits=2)
+                maxmz = round(max(mz(msidata)), digits=2)
+                transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                 QC_numbers= cbind(QC_numbers, transformed)
                 vectorofactions = append(vectorofactions, "transformed")
 
@@ -436,7 +448,7 @@
     pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
     plot(0,type='n',axes=FALSE,ann=FALSE)
     title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
-    rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
+    rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity")
     grid.table(t(QC_numbers))
 
     #if str($tabular_annotation.load_annotation) == 'yes_annotation':
@@ -891,18 +903,19 @@
 
 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. 
 
-Input data: 3 types of input data can be used:
+Input data: 3 types of MSI data can be used:
 
 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
+- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking.
 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column
 
 Options:
 
 - Normalization: Normalization of intensities to total ion current (TIC)
-- Baseline reduction: Baseline  reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets)
-- Smoothening: Smoothing of the peaks reduces noise and improves peak detection
+- Baseline reduction: Baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets)
+- Smoothing: Smoothing of the peaks reduces noise and improves peak detection
 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value
 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
@@ -914,7 +927,7 @@
 
 - imzML file, preprocessed
 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations
-- optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
+- optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values)
 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group
 
 Tip: