Mercurial > repos > galaxyp > msi_preprocessing

--- a/msi_preprocessing.xml	Wed Aug 22 13:43:04 2018 -0400
+++ b/msi_preprocessing.xml	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6">
+<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.7">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -103,7 +103,7 @@
             print('Baseline_reduction')
             ##baseline reduction

-            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
+            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline)

             ############################### QC ###########################

@@ -132,7 +132,7 @@

                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
             #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
-                print('sgolay smoothing')
+                print('moving average smoothing')

                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)

@@ -156,7 +156,7 @@
             ## Peakpicking

             ## remove duplicated coordinates, otherwise peak picking will fail
-            print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
+            print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
             msidata <- msidata[,!duplicated(coord(msidata))]

             #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
@@ -364,14 +364,15 @@
             sample_matrix = cbind(sample_matrix, subsample_calc)
             count = count+1
             }
-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+            sample_matrix_mean = cbind(mz(msidata),sample_matrix)
+            sample_matrix_mean = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_mean)
+            write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            full_sample_calc_mean = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
+            full_sample_calc_mean = cbind(mz(msidata),full_sample_calc_mean)
+            full_sample_calc_mean = rbind(c("mz", "$infile.display_name"), full_sample_calc_mean)
+            write.table(full_sample_calc_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }

     #end if
@@ -391,14 +392,15 @@
             count = count+1
             }

-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            sample_matrix_median = cbind(mz(msidata),sample_matrix)
+            sample_matrix_median = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_median)
+            write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+
+            full_sample_calc_median = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
+            full_sample_calc_median = cbind(mz(msidata),full_sample_calc_median)
+            full_sample_calc_median = rbind(c("mz", "$infile.display_name"), full_sample_calc_median)
+            write.table(full_sample_calc_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }
     #end if

@@ -414,15 +416,15 @@
             count = count+1
             }

-            rownames(sample_matrix) = mz(msidata)
-            colnames(sample_matrix) = levels(msidata\$annotation)
-            write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            sample_matrix_sd = cbind(mz(msidata),sample_matrix)
+            sample_matrix_sd = rbind(c("mz", levels(msidata\$annotation)), sample_matrix_sd)
+            write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{

-            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
-            rownames(full_sample_calc) = mz(msidata)
-            colnames(full_sample_calc) = "$infile.display_name"
-            write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
+            full_sample_calc_sd = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
+            full_sample_calc_sd = cbind(mz(msidata),full_sample_calc_sd)
+            full_sample_calc_sd = rbind(c("mz", "$infile.display_name"), full_sample_calc_sd)
+            write.table(full_sample_calc_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }
     #end if
     print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))
@@ -436,8 +438,8 @@
         if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
             spectramatrix = spectra(msidata)[]
             spectramatrix = cbind(mz(msidata),spectramatrix)
-            newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
-            write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
+            newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix)
+            write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
         }else{
             print("file has no features or pixels left")
         }
@@ -517,7 +519,7 @@
         </conditional>
         <repeat name="methods" title="Preprocessing" min="1" max="50">
             <conditional name="methods_conditional">
-                <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply">
+                <param name="preprocessing_method" type="select" label="Preprocessing methods">
                     <option value="Normalization" selected="True">Normalization to TIC</option>
                     <option value="Baseline_reduction">Baseline Reduction</option>
                     <option value="Smoothing">Peak smoothing</option>
@@ -529,8 +531,12 @@
                 </param>
                 <when value="Normalization"/>
                 <when value="Baseline_reduction">
-                    <param name="blocks_baseline" type="integer" value="50"
+                    <param name="blocks_baseline" type="integer" value="500"
                         label="Blocks"/>
+                    <param name="spar_baseline" type="float" value="1.0" label="Spar value"
+                           help = "Smoothing parameter for the spline smoothing
+                                  applied to the spectrum in order to decide the cutoffs
+                              for throwing away false noise spikes that might occur inside peaks"/>
                 </when>
                 <when value="Smoothing">
                     <conditional name="methods_for_smoothing">
@@ -540,28 +546,28 @@
                             <option value="ma">moving average</option>
                         </param>
                         <when value="gaussian">
-                            <param name="sd_gaussian" type="float" value="4"
+                            <param name="sd_gaussian" type="float" value="2"
                                    label="The standard deviation for the Gaussian kernel (window/sd)"/>
                         </when>
                         <when value="sgolay">
                             <param name="order_of_filters" type="integer" value="3"
-                                   label="The order of the smoothing filter"/>
+                                   label="The order of the smoothing filter, must be smaller than window size"/>
                         </when>
                         <when value="ma">
-                            <param name="coefficients_ma_filter" type="integer" value="1"
+                            <param name="coefficients_ma_filter" type="float" value="1"
                                    label="The coefficients for the moving average filter"/>
                         </when>
                     </conditional>
-                    <param name="window_smoothing" type="integer" value="9"
+                    <param name="window_smoothing" type="float" value="8"
                                 label="Window size"/>
                 </when>
                 <when value="Peak_picking">
-                    <param name="SNR_picking_method" type="integer" value="3"
+                    <param name="SNR_picking_method" type="integer" value="6"
                         label="Signal to noise ratio"
                         help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/>
                     <param name="blocks_picking" type="integer" value="100" label = "Number of blocks"
                         help="Number of blocks in which to divide mass spectrum to calculate noise"/>
-                    <param name="window_picking" type="integer" value="5" label= "Window size" help="Window width for seeking local maxima"/>
+                    <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/>
                     <conditional name="methods_for_picking">
                         <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files">
                             <option value="adaptive" selected="True">adaptive</option>
@@ -589,7 +595,7 @@
                             <option value="DP">DP</option>
                         </param>
                         <when value="diff">
-                            <param name="value_diffalignment" type="integer" value="200"
+                            <param name="value_diffalignment" type="float" value="200"
                                    label="diff.max" help="Peaks that differ less than this value will be aligned together"/>
                             <param name="units_diffalignment" type="select" display = "radio" optional = "False"
                                    label="units">
@@ -598,7 +604,7 @@
                             </param>
                         </when>
                         <when value="DP">
-                            <param name="gap_DPalignment" type="integer" value="0"
+                            <param name="gap_DPalignment" type="float" value="0"
                                    label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/>
                         </when>
                     </conditional>
@@ -673,7 +679,7 @@
                 </when>
                 <when value="Transformation">
                     <conditional name="transf_conditional">
-                        <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)">
+                        <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)">
                             <option value="log2" selected="True">log2</option>
                             <option value="sqrt">sqrt</option>
                         </param>
@@ -708,16 +714,16 @@
     <outputs>
         <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
         <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
-        <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
+        <data format="tabular" name="summarized_mean" label="$infile.display_name preprocessed_mean">
             <filter>summary_type and "mean" in summary_type</filter>
         </data>
-        <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix">
+        <data format="tabular" name="summarized_median" label="$infile.display_name preprocessed_median">
             <filter>summary_type and "median" in summary_type</filter>
         </data>
-        <data format="tabular" name="summarized_output_sd" label="$infile.display_name sd_matrix">
+        <data format="tabular" name="summarized_sd" label="$infile.display_name preprocessed_sd">
             <filter>summary_type and "sd" in summary_type</filter>
         </data>
-        <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix">
+        <data format="tabular" name="intensity_matrix" label="$infile.display_name preprocessed_matrix">
             <filter>output_matrix</filter>
         </data>
     </outputs>
@@ -737,7 +743,9 @@
                     <param name="preprocessing_method" value="Smoothing"/>
                     <conditional name="methods_for_smoothing">
                         <param name="smoothing_method" value="gaussian"/>
+                        <param name="sd_gaussian" value="4"/>
                     </conditional>
+                        <param name="window_smoothing" value="9"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -748,6 +756,7 @@
                     </conditional>
                     <param name="blocks_picking" value="3"/>
                     <param name="window_picking" value="3"/>
+                    <param name="SNR_picking_method" value="3"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -777,7 +786,7 @@
             </conditional>
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
-            <output name="matrixasoutput" file="preprocessing_results1.txt"/>
+            <output name="intensity_matrix" file="preprocessing_results1.txt"/>
             <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="4">
@@ -811,8 +820,8 @@
             </conditional>
             <param name="summary_type" value="median,sd"/>
             <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
-            <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
-            <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
+            <output name="summarized_median" file="preprocessing_median2.txt" lines_diff="2"/>
+            <output name="summarized_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
             <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="3">
@@ -831,6 +840,7 @@
                     <param name="preprocessing_method" value="Peak_picking"/>
                     <param name="blocks_picking" value="100"/>
                     <param name="window_picking" value="5"/>
+                    <param name="SNR_picking_method" value="3"/>
                         <param name="picking_method" value="limpic"/>
                 </conditional>
             </repeat>
@@ -848,7 +858,7 @@
             </conditional>
             <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
             <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
-            <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
+            <output name="summarized_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
         </test>
         <test expect_num_outputs="3">
             <param name="infile" value="" ftype="analyze75">
@@ -872,7 +882,7 @@
             </conditional>
             <param name="output_matrix" value="True"/>
             <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
-            <output name="matrixasoutput" file="preprocessing_results4.txt"/>
+            <output name="intensity_matrix" file="preprocessing_results4.txt"/>
             <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
         </test>
         <test expect_num_outputs="2">
--- a/test-data/preprocessing_mean3.txt	Wed Aug 22 13:43:04 2018 -0400
+++ b/test-data/preprocessing_mean3.txt	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-	Uploaded Composite Dataset (analyze75)
+mz	Uploaded Composite Dataset (analyze75)
 1199.55615234375	3.35218415321305
 1200.09387207031	1.08671297501661
 1200.59020996094	2.15851707603805
--- a/test-data/preprocessing_median2.txt	Wed Aug 22 13:43:04 2018 -0400
+++ b/test-data/preprocessing_median2.txt	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-	File1	File2	NA
+mz	File1	File2	NA
 101.083335876465	0	0.133186891674995	0.266373783349991
 101.666664123535	0	0	0
 102.166664123535	0	0	0
Binary file test-data/preprocessing_results1.RData has changed
Binary file test-data/preprocessing_results1.pdf has changed
--- a/test-data/preprocessing_results1.txt	Wed Aug 22 13:43:04 2018 -0400
+++ b/test-data/preprocessing_results1.txt	Tue Sep 04 13:42:22 2018 -0400
@@ -1,3 +1,3 @@
-mz | spectra	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
+mz	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
 329	8.48069807321137	6.00276368862812	0	0	7.22240715797167	6.68463797360356	0	0	0
 345	0	0	4.70593890744759	0	0	0	5.23000350586712	4.17949067812964	5.08555910047608
Binary file test-data/preprocessing_results2.pdf has changed
Binary file test-data/preprocessing_results3.RData has changed
Binary file test-data/preprocessing_results3.pdf has changed
Binary file test-data/preprocessing_results4.RData has changed
Binary file test-data/preprocessing_results4.pdf has changed
--- a/test-data/preprocessing_results4.txt	Wed Aug 22 13:43:04 2018 -0400
+++ b/test-data/preprocessing_results4.txt	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-mz | spectra	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
+mz	x = 1, y = 1	x = 2, y = 1	x = 3, y = 1	x = 1, y = 2	x = 2, y = 2	x = 3, y = 2	x = 1, y = 3	x = 2, y = 3	x = 3, y = 3
 1199	1.90173968313755	1.13259535967648	2.08382650993109	2.34349737625869	1.33087314662273	2.14468085106383	3.43161925601751	1.32706902782797	2.22480967308554
 1200	1.39388874502695	0.970046951574763	1.52152411836238	1.35619061126081	1.10906095551895	1.66382978723404	2.22846006564551	1.19804842790025	1.7089117778773
 1201	1.13095882671438	0.99102093971692	1.23623834616944	1.19344773790952	1.05864909390445	1.31063829787234	1.67396061269147	1.07824358511023	1.28168383340797
Binary file test-data/preprocessing_results5.RData has changed
Binary file test-data/preprocessing_results5.pdf has changed
--- a/test-data/preprocessing_sd2.txt	Wed Aug 22 13:43:04 2018 -0400
+++ b/test-data/preprocessing_sd2.txt	Tue Sep 04 13:42:22 2018 -0400
@@ -1,4 +1,4 @@
-	File1	File2	NA
+mz	File1	File2	NA
 101.083335876465	0.180910895583245	0.284914371691127	0.358878736172051
 101.666664123535	0	0	0
 102.166664123535	0	0	0