diff preprocessing.xml @ 7:44a4b31fcbf3 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f986c51abe33c7f622d429a3c4a79ee24b33c1f3"
author galaxyp
date Thu, 23 Apr 2020 08:07:20 -0400
parents 5bf056c0354e
children 87bb011a4ee8
line wrap: on
line diff
--- a/preprocessing.xml	Wed Mar 25 08:13:17 2020 -0400
+++ b/preprocessing.xml	Thu Apr 23 08:07:20 2020 -0400
@@ -1,17 +1,14 @@
-<tool id="cardinal_preprocessing" name="MSI preprocessing" version="2.4.0.0">
+<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1">
     <description>
         mass spectrometry imaging preprocessing
     </description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="2.4.0">bioconductor-cardinal</requirement>
-        <requirement type="package" version="3.6.1">r-base</requirement>
+    <expand macro="requirements">
         <requirement type="package" version="2.3">r-gridextra</requirement>
         <requirement type="package" version="3.2.1">r-ggplot2</requirement>
-        <requirement type="package" version="0.20_38">r-lattice</requirement>
-    </requirements>
+    </expand>
     <command detect_errors="exit_code">
     <![CDATA[
 
@@ -32,33 +29,21 @@
 
 ################################# load libraries and read file #################
 
+## set CPU, default = 1
+
+if (Sys.getenv("GALAXY_SLOTS")!="")
+    {
+        number_cpu = 1 ## default = 1
+    }else{
+        number_cpu = as.numeric(Sys.getenv("GALAXY_SLOTS")) ##cpu set by Galaxy
+    }
+
 library(Cardinal)
 library(gridExtra)
-library(lattice)
 library(ggplot2)
 
 
-        ## function to read RData files independent of filename
-        loadRData <- function(fileName){
-        load(fileName)
-        get(ls()[ls() != "fileName"])
-        }
-
-        #if $infile.ext == 'imzml'
-            #if str($processed_cond.processed_file) == "processed":
-                msidata <- readImzML('infile', resolution=$processed_cond.accuracy, units = "$processed_cond.units")
-                centroided(msidata) = $centroids
-            #else
-                msidata <- readImzML('infile')
-                centroided(msidata) = $centroids
-            #end if
-        #elif $infile.ext == 'analyze75'
-            msidata = readAnalyze('infile')
-            centroided(msidata) = $centroids
-        #else
-            msidata = loadRData('infile.RData')
-            msidata = as(msidata, "MSImagingExperiment")
-        #end if
+@READING_MSIDATA_FULLY_COMPATIBLE@
 
 
 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail
@@ -108,7 +93,7 @@
             }
 
             msidata = normalize(msidata, method="tic")
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
 
             ############################### QC ###########################
@@ -134,7 +119,7 @@
             }
 
             msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline)
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             ############################### QC ###########################
 
@@ -180,7 +165,7 @@
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
 
             #end if
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             ############################### QC ###########################
 
@@ -217,7 +202,7 @@
                 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
 
             #end if
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             #if str($method.methods_conditional.imzml_output) == "cont_format":
                 #set $continuous_format = True
@@ -262,7 +247,7 @@
 
             #end if
 
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             #if str($method.methods_conditional.imzml_output) == "cont_format":
                 #set $continuous_format = True
@@ -286,7 +271,7 @@
             print('Peak_filtering')
 
             msidata = peakFilter(msidata, freq.min = $method.methods_conditional.frequ_filtering)
-            msidata <- process(msidata, BPPARAM=MulticoreParam())
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             ############################### QC ###########################
 
@@ -300,6 +285,32 @@
             print(plot(msidata, pixel=random_spectra))
             title("Spectra after filtering", outer=TRUE, line=0)
 
+    ############################### Peak binning ###########################
+
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_binning':
+            print('Peak_binning')
+
+            ## reading reference file
+            reference_table = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE)
+            reference_column = reference_table[,$method.methods_conditional.feature_column]
+            peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
+
+            msidata = peakBin(msidata, ref = peak_reference, tolerance = $method.methods_conditional.peakbin_tol, units = "$method.methods_conditional.peakbin_units", type="$method.methods_conditional.peaks_type")
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
+
+            ############################### QC ###########################
+
+            maxfeatures =nrow(msidata)
+            pixelcount = ncol(msidata)
+            minmz = round(min(mz(msidata)), digits=2)
+            maxmz = round(max(mz(msidata)), digits=2)
+            peak_binned = c(minmz, maxmz,maxfeatures, pixelcount)
+            QC_numbers= cbind(QC_numbers, peak_binned)
+            vectorofactions = append(vectorofactions, "peak binned")
+            print(plot(msidata, pixel=random_spectra))
+            title("Spectra after peak binning", outer=TRUE, line=0)
+
+
     ############################### Data reduction ###########################
 
         #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
@@ -324,23 +335,6 @@
                 print('resample reduction')
 
                 msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
-
-            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
-                print('peaks reduction')
-
-                #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':
-
-                    reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.mz_tabular", header = $method.methods_conditional.methods_for_reduction.ref_type.feature_header, stringsAsFactors = FALSE)
-                    reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.feature_column]
-                    peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]
-
-                #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':
-
-                        peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
-
-                #end if
-
-                msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
             #end if
 
             ## coercition into new format
@@ -447,6 +441,7 @@
                     <option value="Peak_picking">Peak picking</option>
                     <option value="Peak_alignment">Peak alignment</option>
                     <option value="Peak_filtering">Peak filtering</option>
+                    <option value="Peak_binning">Peak binning to reference peaks</option>
                     <option value="Data_reduction">Data reduction</option>
                     <option value="Transformation">Transformation</option>
                 </param>
@@ -464,7 +459,7 @@
                     <param name="blocks_baseline" type="integer" value="500"
                         label="Blocks"/>
                     <param name="spar_baseline" type="float" value="1.0" label="Spar value" 
-                           help = "Smoothing parameter for the spline smoothing 
+                           help="Smoothing parameter for the spline smoothing 
                                   applied to the spectrum in order to decide the cutoffs 
                               for throwing away false noise spikes that might occur inside peaks"/>
                 </when>
@@ -515,7 +510,7 @@
                         <when value="mad"/>
                         <when value="simple"/>
                     </conditional>
-                <param name="imzml_output" type="boolean" label="imzML output in processed format" truevalue="proc_format" falsevalue="cont_format" help= "Processed imzML works only in MALDIquant tools, not yet in MSI tools (Cardinal)"/>
+                <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/>
                 </when>
                 <when value="Peak_alignment">
                     <param name="value_diffalignment" type="float" value="200"
@@ -534,17 +529,35 @@
                             <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
                         </when>
                     </conditional>
-                <param name="imzml_output" type="boolean" label="imzML output in processed format" truevalue="proc_format" falsevalue="cont_format" help= "Processed imzML works only in MALDIquant tools, not yet in MSI tools (Cardinal)"/>
+                <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format" help= "Processed imzML works only in MALDIquant tools, not yet in MSI tools (Cardinal)"/>
                 </when>
                 <when value="Peak_filtering">
                     <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/>
                 </when>
+                <when value="Peak_binning">
+                    <expand macro="reading_1_column_mz_tabular" label="A reference to which the peaks are binned." help="Tabular file with m/z features to extract from input file"/>
+                    <param name="peakbin_tol" value="NA" type="text" label="The tolerance to be used when matching the m/z features in the dataset to the reference. If this is NA, then automatically guess a resolution from the data." >
+                        <sanitizer invalid_char="">
+                            <valid initial="string.digits">
+                                <add value="NA" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param name="peakbin_units" display="radio" type="select" label="The units to use for the tolerance.">
+                        <option value="mz" >mz</option>
+                        <option value="ppm" selected="True" >ppm</option>
+                    </param>
+                    <param name="peaks_type" type="select" display="radio"
+                           label="Should the peak height or area under the curve be taken as the intensity value?">
+                            <option value="height" selected="True">height</option>
+                            <option value="area">area</option>
+                    </param>
+                </when>
                 <when value="Data_reduction">
                     <conditional name="methods_for_reduction">
                         <param name="reduction_method" type="select" label="Reduction method">
                             <option value="bin" selected="True">bin</option>
                             <option value="resample">resample</option>
-                            <option value="peaks">peaks</option>
                         </param>
                         <when value="bin">
                             <param name="bin_width" type="float" value="1"
@@ -565,25 +578,6 @@
                             <param name="resample_step" type="float" value="1"
                                    label="The step size in m/z" help="Step size must be greater than range of m/z values divided by number of m/z features"/>
                         </when>
-                        <when value="peaks">
-                            <param name="peaks_type" type="select" display="radio"
-                                   label="Should the peak height or area under the curve be taken as the intensity value?">
-                                    <option value="height" selected="True">height</option>
-                                    <option value="area">area</option>
-                            </param>                            
-                            <conditional name="ref_type">
-                                <param name="reference_datatype" type="select" label="Choose reference datatype">
-                                    <option value="table" selected="True">tabular file</option>
-                                    <option value="msidata_ref">msidata file</option>
-                                </param>
-                                <when value="table">
-                                    <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to extract from input file"/>
-                                </when>
-                                <when value="msidata_ref">
-                                    <param name="peaks_msidata" type="data" format="rdata" label="Picked and aligned Cardinal MSImageSet saved as RData"/>
-                                </when>
-                            </conditional>
-                        </when>
                     </conditional>
                 </when>
                 <when value="Transformation">
@@ -635,11 +629,13 @@
                     <param name="blocks_picking" value="3"/>
                     <param name="window_picking" value="3"/>
                     <param name="SNR_picking_method" value="3"/>
+                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
+                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -673,11 +669,13 @@
                     <conditional name="methods_for_picking">
                         <param name="picking_method" value="adaptive"/>
                     </conditional>
+                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
+                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
@@ -699,27 +697,19 @@
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_picking"/>
-                    <param name="blocks_picking" value="100"/>
+                    <param name="blocks_picking" value="20"/>
                     <param name="window_picking" value="5"/>
-                    <param name="SNR_picking_method" value="3"/>
+                    <param name="SNR_picking_method" value="2"/>
                     <conditional name="methods_for_picking">
                         <param name="picking_method" value="mad"/>
                     </conditional>
+                <param name="imzml_output" value="proc_format"/>
                 </conditional>
-                <param name="imzml_output" value="proc_format"/>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
-                </conditional>
-            <param name="imzml_output" value="proc_format"/>
-            </repeat>
-            <repeat name="methods">
-                <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Transformation"/>
-                        <conditional name="transf_conditional">
-                            <param name="trans_type" value="log2"/>
-                        </conditional>
+                    <param name="imzml_output" value="proc_format"/>
                 </conditional>
             </repeat>
             <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
@@ -762,7 +752,7 @@
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Transformation"/>
                         <conditional name="transf_conditional">
-                            <param name="trans_type" value="sqrt"/>
+                            <param name="trans_type" value="log2"/>
                         </conditional>
                 </conditional>
             </repeat>
@@ -799,12 +789,13 @@
 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept
 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot
-- Data reduction: binning, resampling or peak filtering to reduce data
+- Peak binning: extracts peaks intensities (from a profile dataset) for a list of m/z (reference) values
+- Data reduction: binning or resampling to reduce data
 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 
 
 **Output**
 
-- MSI data as continuous imzML file (option to output processed imzML file only after peak picking/peak alignment; but is not yet compatible with other MSI tools) 
+- MSI data as continuous or processed imzML file
 - pdf with key values and four random mass spectra after each processing step
 
         ]]>