Mercurial > repos > galaxyp > msi_preprocessing
changeset 7:1a3d477bc54a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author | galaxyp |
---|---|
date | Fri, 06 Jul 2018 14:13:48 -0400 |
parents | d3fd539f477e |
children | d77c5228fd1a |
files | msi_preprocessing.xml test-data/preprocessing_results1.RData test-data/preprocessing_results1.pdf test-data/preprocessing_results1.txt test-data/preprocessing_results2.pdf test-data/preprocessing_results3.RData test-data/preprocessing_results3.pdf test-data/preprocessing_results4.RData test-data/preprocessing_results4.pdf test-data/preprocessing_results4.txt test-data/preprocessing_results5.RData test-data/preprocessing_results5.pdf |
diffstat | 12 files changed, 299 insertions(+), 292 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_preprocessing.xml Thu Jun 21 16:45:55 2018 -0400 +++ b/msi_preprocessing.xml Fri Jul 06 14:13:48 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.3"> +<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4"> <description> mass spectrometry imaging preprocessing </description> @@ -38,14 +38,16 @@ #if str($processed_cond.processed_file) == "processed": msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") #else - msidata <- readImzML('infile') + msidata <- readImzML('infile', attach.only=TRUE) #end if #elif $infile.ext == 'analyze75' - msidata = readAnalyze('infile') + msidata = readAnalyze('infile', attach.only=TRUE) #else load('infile.RData') #end if +print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) + ## function to later read RData reference files in loadRData <- function(fileName){ @@ -54,357 +56,358 @@ get(ls()[ls() != "fileName"]) } -######################### preparations for QC report ################# - - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) - vectorofactions = "inputdata" - -############################### Preprocessing steps ########################### -############################################################################### +if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){ + ######################### preparations for QC report ################# -#for $method in $methods: - -############################### Normalization ########################### + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) + vectorofactions = "inputdata" - #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': - print('Normalization') - ##normalization + ############################### Preprocessing steps ########################### + ############################################################################### - msidata = normalize(msidata, method="tic") + #for $method in $methods: - ############################### QC ########################### + ############################### Normalization ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - normalized = c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, normalized) - vectorofactions = append(vectorofactions, "normalized") + #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': + print('Normalization') + ##normalization -############################### Baseline reduction ########################### + msidata = normalize(msidata, method="tic") + + ############################### QC ########################### - #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': - print('Baseline_reduction') - ##baseline reduction - - msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline) - - ############################### QC ########################### + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + normalized = c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, normalized) + vectorofactions = append(vectorofactions, "normalized") - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - baseline= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, baseline) - vectorofactions = append(vectorofactions, "baseline red.") + ############################### Baseline reduction ########################### -############################### Smoothing ########################### + #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': + print('Baseline_reduction') + ##baseline reduction - #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': - print('Smoothing') - ## Smoothing + msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline) - #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': - print('gaussian smoothing') - - msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) + ############################### QC ########################### - #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': - print('sgolay smoothing') + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + baseline= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, baseline) + vectorofactions = append(vectorofactions, "baseline red.") - msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) - #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': - print('sgolay smoothing') - - msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) - - #end if - - ############################### QC ########################### + ############################### Smoothing ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - smoothed= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, smoothed) - vectorofactions = append(vectorofactions, "smoothed") + #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': + print('Smoothing') + ## Smoothing -############################### Peak picking ########################### + #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': + print('gaussian smoothing') - #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': - print('Peak_picking') - ## Peakpicking + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) - #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': - print('adaptive peakpicking') - - msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking) + #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': + print('sgolay smoothing') - #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic': - print('limpic peakpicking') - - msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking) + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) + #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': + print('sgolay smoothing') - #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple': - print('simple peakpicking') - - msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) + msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) - #end if - - ############################### QC ########################### + #end if - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - picked= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, picked) - vectorofactions = append(vectorofactions, "picked") + ############################### QC ########################### -############################### Peak alignment ########################### - - #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': - print('Peak_alignment') - ## Peakalignment + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + smoothed= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, smoothed) + vectorofactions = append(vectorofactions, "smoothed") - #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref': + ############################### Peak picking ########################### - align_peak_reference = msidata - - #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': + #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': + print('Peak_picking') + ## Peakpicking - align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE) - align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column] - align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))] - if (length(align_peak_reference) == 0) - {align_peak_reference = 0} - - #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref': - align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata') + ## remove duplicated coordinates, otherwise peak picking will fail + print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) + msidata <- msidata[,!duplicated(coord(msidata))] - #end if + #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': + print('adaptive peakpicking') - #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff': - print('diff peakalignment') + msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking) - msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference) + #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic': + print('limpic peakpicking') - #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP': - print('DPpeakalignment') + msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking) - msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference) - - #end if - - ############################### QC ########################### + #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple': + print('simple peakpicking') - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - aligned= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, aligned) - vectorofactions = append(vectorofactions, "aligned") + msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) -############################### Peak filtering ########################### + #end if + + ############################### QC ########################### - #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': - print('Peak_filtering') - - msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering) - - ############################### QC ########################### + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + picked= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, picked) + vectorofactions = append(vectorofactions, "picked") - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - filtered= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, filtered) - vectorofactions = append(vectorofactions, "filtered") + ############################### Peak alignment ########################### -############################### Data reduction ########################### - - #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction': - print('Data_reduction') + #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': + print('Peak_alignment') + ## Peakalignment - #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin': - print('bin reduction') + #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref': - msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun) + align_peak_reference = msidata - #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': - print('resample reduction') + #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': - msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step) - - #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks': - print('peaks reduction') - - #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table': + align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE) + align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column] + align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))] + if (length(align_peak_reference) == 0) + {align_peak_reference = 0} + + #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref': - reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE) - reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column] - peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] - - #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref': - - peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata') + align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata') #end if - msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type") - #end if - ############################### QC ########################### + #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff': + print('diff peakalignment') + + msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference) + + #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP': + print('DPpeakalignment') + + msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference) + + #end if + + ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0)) - medint = round(median(spectra(msidata)[]), digits=2) - TICs = round(mean(colSums(spectra(msidata)[])), digits=1) - reduced= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, reduced) - vectorofactions = append(vectorofactions, "reduced") + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + aligned= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, aligned) + vectorofactions = append(vectorofactions, "aligned") - ############################### Transformation ########################### + ############################### Peak filtering ########################### - #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': - print('Transformation') + #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': + print('Peak_filtering') - #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': - print('log2 transformation') + msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering) + + ############################### QC ########################### - spectra(msidata)[][spectra(msidata)[] ==0] = NA - print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[])))) - spectra(msidata)[] = log2(spectra(msidata)[]) + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + filtered= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, filtered) + vectorofactions = append(vectorofactions, "filtered") - #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': - print('squareroot transformation') + ############################### Data reduction ########################### + + #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction': + print('Data_reduction') - spectra(msidata)[] = sqrt(spectra(msidata)[]) + #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin': + print('bin reduction') + + msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun) - #end if + #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': + print('resample reduction') + + msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step) - ############################### QC ########################### + #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks': + print('peaks reduction') + + #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table': - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0), na.rm=TRUE) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - TICs = round(mean(colSums(spectra(msidata)[]), na.rm=TRUE), digits=1) - transformed= c(maxfeatures, medianpeaks, medint, TICs) - QC_numbers= cbind(QC_numbers, transformed) - vectorofactions = append(vectorofactions, "transformed") + reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE) + reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column] + peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] + + #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref': + + peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata') + + #end if - #end if -#end for + msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type") + #end if + ############################### QC ########################### + + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + reduced= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, reduced) + vectorofactions = append(vectorofactions, "reduced") + + ############################### Transformation ########################### -############# Outputs: summar matrix, RData, tabular and QC report ############# -################################################################################ -## optional summarized matrix - print('Summarized matrix') + #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': + print('Transformation') + + #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': + print('log2 transformation') -#if "mean" in str($summary_type).split(","): - print("mean matrix") - if (!is.null(levels(msidata\$combined_sample))){ + spectra(msidata)[][spectra(msidata)[] ==0] = NA + print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[])))) + spectra(msidata)[] = log2(spectra(msidata)[]) + + #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': + print('squareroot transformation') + + spectra(msidata)[] = sqrt(spectra(msidata)[]) + + #end if + + ############################### QC ########################### - sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) - count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] - subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) - sample_matrix = cbind(sample_matrix, subsample_calc) - count = count+1 - } - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) - write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - }else{ - full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - } + maxfeatures = length(features(msidata)) + medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) + medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) + TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) + transformed= c(maxfeatures, medianpeaks, medint, TICs) + QC_numbers= cbind(QC_numbers, transformed) + vectorofactions = append(vectorofactions, "transformed") + + #end if + #end for + + ############# Outputs: summar matrix, RData, tabular and QC report ############# + ################################################################################ + ## optional summarized matrix + print('Summarized matrix') -#end if + #if "mean" in str($summary_type).split(","): + print("mean matrix") + if (!is.null(levels(msidata\$combined_sample))){ -#if "median" in str($summary_type).split(","): - print("median matrix") - if (!is.null(levels(msidata\$combined_sample))){ - sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) - count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] - subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) - sample_matrix = cbind(sample_matrix, subsample_calc) - count = count+1 + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$combined_sample)){ + subsample_pixels = msidata[,msidata\$combined_sample == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1 + } + rownames(sample_matrix) = mz(msidata) + colnames(sample_matrix) = levels(msidata\$combined_sample) + write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + }else{ + full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) + rownames(full_sample_calc) = mz(msidata) + colnames(full_sample_calc) = "$infile.display_name" + write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") } - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) - write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - }else{ - full_sample_calc = apply(spectra(msidata)[],1,median, na.rm=TRUE) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - } -#end if + #end if -#if "sd" in str($summary_type).split(","): - print("sd matrix") - if (!is.null(levels(msidata\$combined_sample))){ - sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) - count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] - subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) - sample_matrix = cbind(sample_matrix, subsample_calc) - count = count+1 + #if "median" in str($summary_type).split(","): + print("median matrix") + if (!is.null(levels(msidata\$combined_sample))){ + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$combined_sample)){ + subsample_pixels = msidata[,msidata\$combined_sample == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1 + } + + rownames(sample_matrix) = mz(msidata) + colnames(sample_matrix) = levels(msidata\$combined_sample) + write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + }else{ + full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE)) + rownames(full_sample_calc) = mz(msidata) + colnames(full_sample_calc) = "$infile.display_name" + write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") } + #end if - rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) - write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - }else{ - full_sample_calc = apply(spectra(msidata)[],1,sd, na.rm=TRUE) - rownames(full_sample_calc) = mz(msidata) - colnames(full_sample_calc) = "$infile.display_name" - write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") - } -#end if + #if "sd" in str($summary_type).split(","): + print("sd matrix") + if (!is.null(levels(msidata\$combined_sample))){ + sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) + count = 1 + for (subsample in levels(msidata\$combined_sample)){ + subsample_pixels = msidata[,msidata\$combined_sample == subsample] + subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) + count = count+1 + } -## save as (.RData) -save(msidata, file="$msidata_preprocessed") - -print(paste0("Number of NAs in intensity matrix: ", sum(is.na(spectra(msidata)[])))) + rownames(sample_matrix) = mz(msidata) + colnames(sample_matrix) = levels(msidata\$combined_sample) + write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + }else{ -## save output matrix -#if $output_matrix: + full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE)) + rownames(full_sample_calc) = mz(msidata) + colnames(full_sample_calc) = "$infile.display_name" + write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + } + #end if + print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[])))) + + ## save as (.RData) + save(msidata, file="$msidata_preprocessed") - if (length(features(msidata))> 0) - { - ## save as intensity matrix - spectramatrix = spectra(msidata)[] - rownames(spectramatrix) = mz(msidata) - newmatrix = rbind(pixels(msidata), spectramatrix) - write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + ## save output matrix + #if $output_matrix: + if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ + spectramatrix = spectra(msidata)[] + spectramatrix = cbind(mz(msidata),spectramatrix) + newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ - print("file has no features left") - write.table(matrix(rownames(coord(msidata)), ncol=ncol(msidata), nrow=1), file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + print("file has no features or pixels left") } + #end if -#end if - -## save QC report + ## save QC report pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) @@ -413,10 +416,14 @@ grid.table(t(QC_numbers)) dev.off() +}else{ + print("inputfile has no intensities > 0") +} + ]]></configfile> </configfiles> <inputs> - <param name="infile" type="data" format="imzml,rdata,danalyze75" + <param name="infile" type="data" format="imzml,rdata,analyze75" label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" help="load imzml and ibd file by uploading composite datatype imzml"/> <conditional name="processed_cond"> @@ -610,7 +617,7 @@ </inputs> <outputs> <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/> - <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/> + <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/> <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix"> <filter>summary_type and "mean" in summary_type</filter> </data> @@ -678,7 +685,7 @@ <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> <output name="matrixasoutput" file="preprocessing_results1.txt"/> - <output name="QC_plots" file="preprocessing_results1.pdf" compare="sim_size"/> + <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> </test> <test expect_num_outputs="4"> <param name="infile" value="123_combined.RData" ftype="rdata"/> @@ -705,7 +712,7 @@ <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/> <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/> - <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/> + <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> </test> <test expect_num_outputs="3"> <param name="infile" value="" ftype="analyze75"> @@ -736,7 +743,7 @@ </repeat> <param name="summary_type" value="mean"/> <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> - <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/> + <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/> </test> <test expect_num_outputs="3"> @@ -759,7 +766,7 @@ <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> <output name="matrixasoutput" file="preprocessing_results4.txt"/> - <output name="QC_plots" file="preprocessing_results4.pdf" compare="sim_size"/> + <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> </test> <test expect_num_outputs="2"> <param name="infile" value="" ftype="imzml"> @@ -776,7 +783,7 @@ </conditional> </repeat> <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> - <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/> + <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/> </test> </tests> <help>
--- a/test-data/preprocessing_results1.txt Thu Jun 21 16:45:55 2018 -0400 +++ b/test-data/preprocessing_results1.txt Fri Jul 06 14:13:48 2018 -0400 @@ -1,3 +1,3 @@ - x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 +mz | spectra x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 329 8.48069807321137 6.00276368862812 0 0 7.22240715797167 6.68463797360356 0 0 0 345 0 0 4.70593890744759 0 0 0 5.23000350586712 4.17949067812964 5.08555910047608
--- a/test-data/preprocessing_results4.txt Thu Jun 21 16:45:55 2018 -0400 +++ b/test-data/preprocessing_results4.txt Fri Jul 06 14:13:48 2018 -0400 @@ -1,4 +1,4 @@ - x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 +mz | spectra x = 1, y = 1 x = 2, y = 1 x = 3, y = 1 x = 1, y = 2 x = 2, y = 2 x = 3, y = 2 x = 1, y = 3 x = 2, y = 3 x = 3, y = 3 1199 1.90173968313755 1.13259535967648 2.08382650993109 2.34349737625869 1.33087314662273 2.14468085106383 3.43161925601751 1.32706902782797 2.22480967308554 1200 1.39388874502695 0.970046951574763 1.52152411836238 1.35619061126081 1.10906095551895 1.66382978723404 2.22846006564551 1.19804842790025 1.7089117778773 1201 1.13095882671438 0.99102093971692 1.23623834616944 1.19344773790952 1.05864909390445 1.31063829787234 1.67396061269147 1.07824358511023 1.28168383340797 @@ -157,4 +157,4 @@ 1354 1.06362120618167 1.11618093417392 1.04838476586595 1.07551811953901 1.02051142468305 1.08344125809436 1.09535248786985 1.11069907763863 1.07245078759322 1355 1.17876426640757 1.01819178799199 1.08701772487747 1.1007336161215 1.0558993559982 1.09555125725338 1.09948776606326 1.05059631369714 1.03765826649839 1356 1.25341933661339 1.11461765555463 1.12696739822804 1.128350588569 1.02840197693575 1.09422492401216 1.08807439824945 1.10589085652331 0.953489859893801 -1357 NA NA NA NA NA NA NA NA NA +1357 NaN NaN NaN NaN NaN NaN NaN NaN NaN