changeset 7:1a3d477bc54a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author galaxyp
date Fri, 06 Jul 2018 14:13:48 -0400
parents d3fd539f477e
children d77c5228fd1a
files msi_preprocessing.xml test-data/preprocessing_results1.RData test-data/preprocessing_results1.pdf test-data/preprocessing_results1.txt test-data/preprocessing_results2.pdf test-data/preprocessing_results3.RData test-data/preprocessing_results3.pdf test-data/preprocessing_results4.RData test-data/preprocessing_results4.pdf test-data/preprocessing_results4.txt test-data/preprocessing_results5.RData test-data/preprocessing_results5.pdf
diffstat 12 files changed, 299 insertions(+), 292 deletions(-) [+]
line wrap: on
line diff
--- a/msi_preprocessing.xml	Thu Jun 21 16:45:55 2018 -0400
+++ b/msi_preprocessing.xml	Fri Jul 06 14:13:48 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.3">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -38,14 +38,16 @@
     #if str($processed_cond.processed_file) == "processed":
         msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
     #else
-        msidata <- readImzML('infile')
+        msidata <- readImzML('infile', attach.only=TRUE)
     #end if
 #elif $infile.ext == 'analyze75'
-    msidata = readAnalyze('infile')
+    msidata = readAnalyze('infile', attach.only=TRUE)
 #else
     load('infile.RData')
 #end if
 
+print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))
+
 ## function to later read RData reference files in
 
 loadRData <- function(fileName){
@@ -54,357 +56,358 @@
 get(ls()[ls() != "fileName"])
 }
 
-######################### preparations for QC report #################
-
-    maxfeatures = length(features(msidata))
-    medianpeaks = median(colSums(spectra(msidata)[]>0))
-    medint = round(median(spectra(msidata)[]), digits=2)
-    TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-    QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs))
-    vectorofactions = "inputdata"
-
-############################### Preprocessing steps ###########################
-###############################################################################
+if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){
+    ######################### preparations for QC report #################
 
-#for $method in $methods:
-
-############################### Normalization ###########################
+        maxfeatures = length(features(msidata))
+        medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+        medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2)
+        TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+        QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs))
+        vectorofactions = "inputdata"
 
-    #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
-        print('Normalization')
-        ##normalization
+    ############################### Preprocessing steps ###########################
+    ###############################################################################
 
-        msidata = normalize(msidata, method="tic")
+    #for $method in $methods:
 
-        ############################### QC ###########################
+    ############################### Normalization ###########################
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            normalized = c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, normalized)
-            vectorofactions = append(vectorofactions, "normalized")
+        #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
+            print('Normalization')
+            ##normalization
 
-############################### Baseline reduction ###########################
+            msidata = normalize(msidata, method="tic")
+
+            ############################### QC ###########################
 
-    #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
-        print('Baseline_reduction')
-        ##baseline reduction
-
-        msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
-
-        ############################### QC ###########################
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),)
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                normalized = c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, normalized)
+                vectorofactions = append(vectorofactions, "normalized")
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            baseline= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, baseline)
-            vectorofactions = append(vectorofactions, "baseline red.")
+    ############################### Baseline reduction ###########################
 
-############################### Smoothing ###########################
+        #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
+            print('Baseline_reduction')
+            ##baseline reduction
 
-    #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
-        print('Smoothing')
-        ## Smoothing
+            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
 
-        #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
-            print('gaussian smoothing')
-
-            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
+            ############################### QC ###########################
 
-        #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
-            print('sgolay smoothing')
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                baseline= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, baseline)
+                vectorofactions = append(vectorofactions, "baseline red.")
 
-            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
-        #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
-            print('sgolay smoothing')
-
-            msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
-
-        #end if
-
-        ############################### QC ###########################
+    ############################### Smoothing ###########################
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            smoothed= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, smoothed)
-            vectorofactions = append(vectorofactions, "smoothed")
+        #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
+            print('Smoothing')
+            ## Smoothing
 
-############################### Peak picking ###########################
+            #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
+                print('gaussian smoothing')
 
-    #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
-        print('Peak_picking')
-        ## Peakpicking
+                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
 
-        #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
-            print('adaptive peakpicking')
-
-            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
+            #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
+                print('sgolay smoothing')
 
-        #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
-            print('limpic peakpicking')
-
-            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
+                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
+            #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
+                print('sgolay smoothing')
 
-        #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
-            print('simple peakpicking')
-
-            msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
+                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
 
-        #end if
-
-        ############################### QC ###########################
+            #end if
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            picked= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, picked)
-            vectorofactions = append(vectorofactions, "picked")
+            ############################### QC ###########################
 
-############################### Peak alignment ###########################
-
-    #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
-        print('Peak_alignment')
-        ## Peakalignment
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                smoothed= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, smoothed)
+                vectorofactions = append(vectorofactions, "smoothed")
 
-        #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':
+    ############################### Peak picking ###########################
 
-            align_peak_reference = msidata
-
-        #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
+            print('Peak_picking')
+            ## Peakpicking
 
-            align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
-            align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
-            align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
-            if (length(align_peak_reference) == 0)
-                {align_peak_reference = 0}
-       
-        #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':
 
-                align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')
+            ## remove duplicated coordinates, otherwise peak picking will fail
+            print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
+            msidata <- msidata[,!duplicated(coord(msidata))]
 
-        #end if
+            #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
+                print('adaptive peakpicking')
 
-        #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
-            print('diff peakalignment')
+                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)
 
-            msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)
+            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
+                print('limpic peakpicking')
 
-       #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
-            print('DPpeakalignment')
+                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)
 
-        msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
-
-       #end if
-
-        ############################### QC ###########################
+            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
+                print('simple peakpicking')
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            aligned= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, aligned)
-            vectorofactions = append(vectorofactions, "aligned")
+                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
 
-############################### Peak filtering ###########################
+            #end if
+
+            ############################### QC ###########################
 
-    #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
-        print('Peak_filtering')
-
-        msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
-
-        ############################### QC ###########################
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                picked= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, picked)
+                vectorofactions = append(vectorofactions, "picked")
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            filtered= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, filtered)
-            vectorofactions = append(vectorofactions, "filtered")
+    ############################### Peak alignment ###########################
 
-############################### Data reduction ###########################
-
-    #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
-        print('Data_reduction')
+        #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
+            print('Peak_alignment')
+            ## Peakalignment
 
-        #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
-            print('bin reduction')
+            #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':
 
-            msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
+                align_peak_reference = msidata
 
-        #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
-            print('resample reduction')
+            #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':
 
-            msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
-
-        #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
-            print('peaks reduction')
-
-            #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
+                align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
+                align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
+                align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
+                if (length(align_peak_reference) == 0)
+                    {align_peak_reference = 0}
+           
+            #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':
 
-                reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
-                reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
-                peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
-
-            #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
-
-                    peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
+                    align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')
 
             #end if
 
-            msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
-        #end if
-        ############################### QC ###########################
+            #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
+                print('diff peakalignment')
+
+                msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)
+
+           #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
+                print('DPpeakalignment')
+
+            msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
+
+           #end if
+
+            ############################### QC ###########################
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0))
-            medint = round(median(spectra(msidata)[]), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
-            reduced= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, reduced)
-            vectorofactions = append(vectorofactions, "reduced")
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                aligned= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, aligned)
+                vectorofactions = append(vectorofactions, "aligned")
 
-    ############################### Transformation ###########################
+    ############################### Peak filtering ###########################
 
-    #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
-        print('Transformation')
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
+            print('Peak_filtering')
 
-        #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
-            print('log2 transformation')
+            msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
+
+            ############################### QC ###########################
 
-            spectra(msidata)[][spectra(msidata)[] ==0] = NA
-            print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
-            spectra(msidata)[] = log2(spectra(msidata)[])
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                filtered= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, filtered)
+                vectorofactions = append(vectorofactions, "filtered")
 
-        #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
-            print('squareroot transformation')
+    ############################### Data reduction ###########################
+
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
+            print('Data_reduction')
 
-            spectra(msidata)[] = sqrt(spectra(msidata)[])
+            #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
+                print('bin reduction')
+
+                msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
 
-       #end if
+            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
+                print('resample reduction')
+
+                msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
 
-        ############################### QC ###########################
+            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
+                print('peaks reduction')
+
+                #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
 
-            maxfeatures = length(features(msidata))
-            medianpeaks = median(colSums(spectra(msidata)[]>0), na.rm=TRUE)
-            medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
-            TICs = round(mean(colSums(spectra(msidata)[]), na.rm=TRUE), digits=1)
-            transformed= c(maxfeatures, medianpeaks, medint, TICs)
-            QC_numbers= cbind(QC_numbers, transformed)
-            vectorofactions = append(vectorofactions, "transformed")
+                    reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
+                    reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
+                    peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
+
+                #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
+
+                        peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
+
+                #end if
 
-        #end if
-#end for
+                msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
+            #end if
+            ############################### QC ###########################
+
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                reduced= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, reduced)
+                vectorofactions = append(vectorofactions, "reduced")
+
+        ############################### Transformation ###########################
 
-############# Outputs: summar matrix, RData, tabular and QC report #############
-################################################################################
-## optional summarized matrix
-    print('Summarized matrix')
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
+            print('Transformation')
+
+            #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
+                print('log2 transformation')
 
-#if "mean" in str($summary_type).split(","):
-    print("mean matrix")
-    if (!is.null(levels(msidata\$combined_sample))){
+                spectra(msidata)[][spectra(msidata)[] ==0] = NA
+                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
+                spectra(msidata)[] = log2(spectra(msidata)[])
+
+            #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
+                print('squareroot transformation')
+
+                spectra(msidata)[] = sqrt(spectra(msidata)[])
+
+           #end if
+
+            ############################### QC ###########################
 
-        sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
-        count = 1
-        for (subsample in levels(msidata\$combined_sample)){
-        subsample_pixels = msidata[,msidata\$combined_sample == subsample]
-        subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
-        sample_matrix = cbind(sample_matrix, subsample_calc)
-        count = count+1
-        }
-        rownames(sample_matrix) = mz(msidata)
-        colnames(sample_matrix) = levels(msidata\$combined_sample)
-        write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }else{
-        full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
-        rownames(full_sample_calc) = mz(msidata)
-        colnames(full_sample_calc) = "$infile.display_name"
-        write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }
+                maxfeatures = length(features(msidata))
+                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
+                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+                TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1)
+                transformed= c(maxfeatures, medianpeaks, medint, TICs)
+                QC_numbers= cbind(QC_numbers, transformed)
+                vectorofactions = append(vectorofactions, "transformed")
+
+            #end if
+    #end for
+
+    ############# Outputs: summar matrix, RData, tabular and QC report #############
+    ################################################################################
+    ## optional summarized matrix
+        print('Summarized matrix')
 
-#end if
+    #if "mean" in str($summary_type).split(","):
+        print("mean matrix")
+        if (!is.null(levels(msidata\$combined_sample))){
 
-#if "median" in str($summary_type).split(","):
-    print("median matrix")
-    if (!is.null(levels(msidata\$combined_sample))){
-        sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
-        count = 1
-        for (subsample in levels(msidata\$combined_sample)){
-        subsample_pixels = msidata[,msidata\$combined_sample == subsample]
-        subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
-        sample_matrix = cbind(sample_matrix, subsample_calc)
-        count = count+1
+            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
+            count = 1
+            for (subsample in levels(msidata\$combined_sample)){
+            subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+            subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
+            sample_matrix = cbind(sample_matrix, subsample_calc)
+            count = count+1
+            }
+            rownames(sample_matrix) = mz(msidata)
+            colnames(sample_matrix) = levels(msidata\$combined_sample)
+            write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+        }else{
+            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
+            rownames(full_sample_calc) = mz(msidata)
+            colnames(full_sample_calc) = "$infile.display_name"
+            write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
         }
 
-        rownames(sample_matrix) = mz(msidata)
-        colnames(sample_matrix) = levels(msidata\$combined_sample)
-        write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }else{
-        full_sample_calc = apply(spectra(msidata)[],1,median, na.rm=TRUE)
-        rownames(full_sample_calc) = mz(msidata)
-        colnames(full_sample_calc) = "$infile.display_name"
-        write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }
-#end if
+    #end if
 
-#if "sd" in str($summary_type).split(","):
-    print("sd matrix")
-    if (!is.null(levels(msidata\$combined_sample))){
-        sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
-        count = 1
-        for (subsample in levels(msidata\$combined_sample)){
-        subsample_pixels = msidata[,msidata\$combined_sample == subsample]
-        subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
-        sample_matrix = cbind(sample_matrix, subsample_calc)
-        count = count+1
+    #if "median" in str($summary_type).split(","):
+        print("median matrix")
+        if (!is.null(levels(msidata\$combined_sample))){
+            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
+            count = 1
+            for (subsample in levels(msidata\$combined_sample)){
+            subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+            subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
+            sample_matrix = cbind(sample_matrix, subsample_calc)
+            count = count+1
+            }
+
+            rownames(sample_matrix) = mz(msidata)
+            colnames(sample_matrix) = levels(msidata\$combined_sample)
+            write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+        }else{
+            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
+            rownames(full_sample_calc) = mz(msidata)
+            colnames(full_sample_calc) = "$infile.display_name"
+            write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
         }
+    #end if
 
-        rownames(sample_matrix) = mz(msidata)
-        colnames(sample_matrix) = levels(msidata\$combined_sample)
-        write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }else{
-        full_sample_calc = apply(spectra(msidata)[],1,sd, na.rm=TRUE)
-        rownames(full_sample_calc) = mz(msidata)
-        colnames(full_sample_calc) = "$infile.display_name"
-        write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
-    }
-#end if
+    #if "sd" in str($summary_type).split(","):
+        print("sd matrix")
+        if (!is.null(levels(msidata\$combined_sample))){
+            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
+            count = 1
+            for (subsample in levels(msidata\$combined_sample)){
+            subsample_pixels = msidata[,msidata\$combined_sample == subsample]
+            subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
+            sample_matrix = cbind(sample_matrix, subsample_calc)
+            count = count+1
+            }
 
-## save as (.RData)
-save(msidata, file="$msidata_preprocessed")
-
-print(paste0("Number of NAs in intensity matrix: ", sum(is.na(spectra(msidata)[]))))
+            rownames(sample_matrix) = mz(msidata)
+            colnames(sample_matrix) = levels(msidata\$combined_sample)
+            write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+        }else{
 
-## save output matrix
-#if $output_matrix:
+            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
+            rownames(full_sample_calc) = mz(msidata)
+            colnames(full_sample_calc) = "$infile.display_name"
+            write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+        }
+    #end if
+    print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))
+
+    ## save as (.RData)
+    save(msidata, file="$msidata_preprocessed")
 
-    if (length(features(msidata))> 0)
-        {
-        ## save as intensity matrix
-        spectramatrix = spectra(msidata)[]
-        rownames(spectramatrix) = mz(msidata)
-        newmatrix = rbind(pixels(msidata), spectramatrix)
-        write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+    ## save output matrix
+    #if $output_matrix:
 
+        if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
+            spectramatrix = spectra(msidata)[]
+            spectramatrix = cbind(mz(msidata),spectramatrix)
+            newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
+            write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
-            print("file has no features left")
-            write.table(matrix(rownames(coord(msidata)), ncol=ncol(msidata), nrow=1), file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
+            print("file has no features or pixels left")
         }
+    #end if
 
-#end if
-
-## save QC report
+    ## save QC report
 
     pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
     plot(0,type='n',axes=FALSE,ann=FALSE)
@@ -413,10 +416,14 @@
     grid.table(t(QC_numbers))
     dev.off()
 
+}else{
+    print("inputfile has no intensities > 0")
+}
+
     ]]></configfile>
     </configfiles>
     <inputs>
-        <param name="infile" type="data" format="imzml,rdata,danalyze75"
+        <param name="infile" type="data" format="imzml,rdata,analyze75"
             label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
             help="load imzml and ibd file by uploading composite datatype imzml"/>
         <conditional name="processed_cond">
@@ -610,7 +617,7 @@
     </inputs>
     <outputs>
         <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
-        <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
+        <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
         <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
             <filter>summary_type and "mean" in summary_type</filter>
         </data>
@@ -678,7 +685,7 @@
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
             <output name="matrixasoutput" file="preprocessing_results1.txt"/>
-            <output name="QC_plots" file="preprocessing_results1.pdf" compare="sim_size"/>
+            <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="4">
             <param name="infile" value="123_combined.RData" ftype="rdata"/>
@@ -705,7 +712,7 @@
             <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
             <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
             <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
-            <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/>
+            <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="3">
             <param name="infile" value="" ftype="analyze75">
@@ -736,7 +743,7 @@
             </repeat>
             <param name="summary_type" value="mean"/>
             <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
-            <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/>
+            <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
             <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
         </test>
         <test expect_num_outputs="3">
@@ -759,7 +766,7 @@
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
             <output name="matrixasoutput" file="preprocessing_results4.txt"/>
-            <output name="QC_plots" file="preprocessing_results4.pdf" compare="sim_size"/>
+            <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="2">
             <param name="infile" value="" ftype="imzml">
@@ -776,7 +783,7 @@
                 </conditional>
             </repeat>
              <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
-            <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/>
+            <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/>
         </test>
     </tests>
     <help>
Binary file test-data/preprocessing_results1.RData has changed
Binary file test-data/preprocessing_results1.pdf has changed
--- a/test-data/preprocessing_results1.txt	Thu Jun 21 16:45:55 2018 -0400
+++ b/test-data/preprocessing_results1.txt	Fri Jul 06 14:13:48 2018 -0400
@@ -1,3 +1,3 @@
-	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
+mz | spectra	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
 329	8.48069807321137	6.00276368862812	0	0	7.22240715797167	6.68463797360356	0	0	0
 345	0	0	4.70593890744759	0	0	0	5.23000350586712	4.17949067812964	5.08555910047608
Binary file test-data/preprocessing_results2.pdf has changed
Binary file test-data/preprocessing_results3.RData has changed
Binary file test-data/preprocessing_results3.pdf has changed
Binary file test-data/preprocessing_results4.RData has changed
Binary file test-data/preprocessing_results4.pdf has changed
--- a/test-data/preprocessing_results4.txt	Thu Jun 21 16:45:55 2018 -0400
+++ b/test-data/preprocessing_results4.txt	Fri Jul 06 14:13:48 2018 -0400
@@ -1,4 +1,4 @@
-	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
+mz | spectra	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
 1199	1.90173968313755	1.13259535967648	2.08382650993109	2.34349737625869	1.33087314662273	2.14468085106383	3.43161925601751	1.32706902782797	2.22480967308554
 1200	1.39388874502695	0.970046951574763	1.52152411836238	1.35619061126081	1.10906095551895	1.66382978723404	2.22846006564551	1.19804842790025	1.7089117778773
 1201	1.13095882671438	0.99102093971692	1.23623834616944	1.19344773790952	1.05864909390445	1.31063829787234	1.67396061269147	1.07824358511023	1.28168383340797
@@ -157,4 +157,4 @@
 1354	1.06362120618167	1.11618093417392	1.04838476586595	1.07551811953901	1.02051142468305	1.08344125809436	1.09535248786985	1.11069907763863	1.07245078759322
 1355	1.17876426640757	1.01819178799199	1.08701772487747	1.1007336161215	1.0558993559982	1.09555125725338	1.09948776606326	1.05059631369714	1.03765826649839
 1356	1.25341933661339	1.11461765555463	1.12696739822804	1.128350588569	1.02840197693575	1.09422492401216	1.08807439824945	1.10589085652331	0.953489859893801
-1357	NA	NA	NA	NA	NA	NA	NA	NA	NA
+1357	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Binary file test-data/preprocessing_results5.RData has changed
Binary file test-data/preprocessing_results5.pdf has changed