Mercurial > repos > galaxyp > cardinal_preprocessing
view preprocessing.xml @ 15:accf9fb6ea01 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit c8d3adac445b4e08e2724e22d7201bfc38bbf40f"
author | galaxyp |
---|---|
date | Sun, 29 Aug 2021 07:32:56 +0000 |
parents | 6b36be80febb |
children | 611d80c0e29d |
line wrap: on
line source
<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> <description> mass spectrometry imaging preprocessing </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"> <requirement type="package" version="2.3">r-gridextra</requirement> <requirement type="package" version="3.3.5">r-ggplot2</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${cardinal_preprocessing}' && Rscript '${cardinal_preprocessing}' && mkdir $outfile_imzml.files_path && mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && echo "imzML file:" > $outfile_imzml && ls -l "$outfile_imzml.files_path" >> $outfile_imzml ]]> </command> <configfiles> <configfile name="cardinal_preprocessing"><![CDATA[ ################################# load libraries and read file ################# ## set CPU, default = 1 if (Sys.getenv("GALAXY_SLOTS")!="") { number_cpu = 1 ## default = 1 }else{ number_cpu = as.numeric(Sys.getenv("GALAXY_SLOTS")) ##cpu set by Galaxy } library(Cardinal) library(gridExtra) library(ggplot2) @READING_MSIDATA_FULLY_COMPATIBLE@ ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail ## set variable to False #set $used_peak_picking = False #set $used_peak_alignment = False #set $continuous_format = False if (ncol(msidata)>0 & nrow(msidata) >0){ ## start QC report pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) ######################### preparations for QC report ################# maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, pixelcount)) vectorofactions = "inputdata" ## Choose random spectra for QC plots random_spectra = sample(pixels(msidata), 4, replace=FALSE) par(oma=c(0,0,2,0)) print(plot(msidata, pixel=random_spectra, col="black")) title("Input spectra", outer=TRUE, line=0) ############################### Preprocessing steps ########################### ############################################################################### #for $method in $methods: ############################### Normalization ########################### #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': print('Normalization') ##normalization msidata = normalize(msidata, method="$method.methods_conditional.methods_for_normalization.normalization_method") msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) normalized = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, normalized) vectorofactions = append(vectorofactions, "normalized") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after normalization", outer=TRUE, line=0) ############################### Baseline reduction ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': print('Baseline_reduction') ##baseline reduction msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) baseline = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, baseline) vectorofactions = append(vectorofactions, "baseline red.") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after baseline reduction", outer=TRUE, line=0) ############################### Smoothing ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': print('Smoothing') ## Smoothing #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': print('gaussian smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': print('sgolay smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## if selected replace negative intensities with zero #if $method.methods_conditional.methods_for_smoothing.replace_negatives: ## bring spectra matrix to disk spectra_df = as.matrix(spectra(msidata)) spectra_df[spectra_df<0] = 0 spectra(msidata) = spectra_df #end if #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': print('moving average smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) smoothed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, smoothed) vectorofactions = append(vectorofactions, "smoothed") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after smoothing", outer=TRUE, line=0) ############################### Mz alignment ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': print('m/z alignment') ## M/z alignment #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE) reference_mz = reference_mz[,$method.methods_conditional.mzalign_ref_type.feature_column] msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) mz_aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, mz_aligned) vectorofactions = append(vectorofactions, "mz aligned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z alignment", outer=TRUE, line=0) ############################### Mz recalibration ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_recalibration': print('m/z recalibration') ## M/z recalibration reference_mz = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE) reference_mz = reference_mz[,$method.methods_conditional.feature_column] msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## remove the reference peaks data to allow proper peak alignment afterwards metadata(featureData(msidata))['reference peaks'] <- NULL ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) mz_recal = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, mz_recal) vectorofactions = append(vectorofactions, "mz recalibrated") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z recalibration", outer=TRUE, line=0) ############################### Peak picking ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': #set $used_peak_picking = True print('Peak_picking') ## Peakpicking #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': print('adaptive peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking) #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'mad': print('mad peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple': print('simple peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) picked = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, picked) vectorofactions = append(vectorofactions, "picked") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak picking", outer=TRUE, line=0) ############################### Peak alignment ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': #set $used_peak_alignment = True print('Peak_alignment') ## Peakalignment #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': align_reference_table = read.delim("$method.methods_conditional.align_ref_type.mz_tabular", header = $method.methods_conditional.align_ref_type.feature_header, stringsAsFactors = FALSE) align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.feature_column] align_peak_reference = as.numeric(align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]) if (length(align_peak_reference) == 0) {align_peak_reference = 0} msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment", ref=align_peak_reference) #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref': msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment") #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, aligned) vectorofactions = append(vectorofactions, "aligned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after alignment", outer=TRUE, line=0) ############################### Peak filtering ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': print('Peak_filtering') msidata = peakFilter(msidata, freq.min = $method.methods_conditional.frequ_filtering) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) filtered = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, filtered) vectorofactions = append(vectorofactions, "filtered") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after filtering", outer=TRUE, line=0) ############################### Peak binning ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_binning': print('Peak_binning') ## reading reference file reference_table = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE) reference_column = reference_table[,$method.methods_conditional.feature_column] peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] msidata = peakBin(msidata, ref = peak_reference, tolerance = $method.methods_conditional.peakbin_tol, units = "$method.methods_conditional.peakbin_units", type="$method.methods_conditional.peaks_type") msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) peak_binned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, peak_binned) vectorofactions = append(vectorofactions, "peak binned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak binning", outer=TRUE, line=0) ############################### Mass binning ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': print('mass binning') #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") #elif str($method.methods_conditional.mz_range.features_filtering) == 'none': msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference': bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE) bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column] msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun, ref=bin_reference_mz) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## optional: replace NA with 0 #if $method.methods_conditional.replace_NA_bin: ## count and replace NAs print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata))))) spectra(msidata)[is.na(spectra(msidata))] = 0 #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) reduced = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, reduced) vectorofactions = append(vectorofactions, "reduced") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z binning", outer=TRUE, line=0) ############################### Transformation ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': print('Transformation') if (class(msidata) == "MSProcessedImagingExperiment"){ msidata = as(msidata, "MSContinuousImagingExperiment") } #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': print('log2 transformation') ## replace 0 with NA to prevent Inf spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix spectra_df[spectra_df ==0] = NA print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) spectra(msidata) = spectra_df ## log transformation spectra(msidata) = log2(spectra(msidata)) ## optional: replace NA with 0 #if $method.methods_conditional.transf_conditional.replace_NA_trans: spectra(msidata)[is.na(spectra(msidata))] = 0 #end if #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': print('squareroot transformation') spectra(msidata) = sqrt(spectra(msidata)) #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) transformed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, transformed) vectorofactions = append(vectorofactions, "transformed") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after transformation", outer=TRUE, line=0) #end if #end for ############# Outputs: RData, imzml and QC report ############# ################################################################################ ## save msidata as imzML file, will only work if there is at least 1 m/z left if (nrow(msidata) > 0){ ## make sure that coordinates are integers coord(msidata)\$y = as.integer(coord(msidata)\$y) coord(msidata)\$x = as.integer(coord(msidata)\$x) ## only continuous files can currently be exported msidata = as(msidata, "MSContinuousImagingExperiment") writeImzML(msidata, "out") } plot(0,type='n',axes=FALSE,ann=FALSE) rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") grid.table(t(QC_numbers)) dev.off() }else{ print("inputfile has no intensities > 0") } ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <repeat name="methods" title="Preprocessing" min="1" max="50"> <conditional name="methods_conditional"> <param name="preprocessing_method" type="select" label="Preprocessing methods"> <option value="Normalization" selected="True">Intensity Normalization</option> <option value="Baseline_reduction">Baseline Reduction</option> <option value="Smoothing">Peak smoothing</option> <option value="mz_alignment">m/z alignment</option> <option value="mz_recalibration">m/z recalibration</option> <option value="Peak_picking">Peak picking</option> <option value="Peak_alignment">Peak alignment</option> <option value="Peak_filtering">Peak filtering</option> <option value="Peak_binning">Peak binning to reference peaks</option> <option value="Mass_binning">m/z binning</option> <option value="Transformation">Transformation</option> </param> <when value="Normalization"> <conditional name="methods_for_normalization"> <param name="normalization_method" type="select" label="Normalization method"> <option value="tic" selected="True">TIC</option> <option value="rms">RMS</option> </param> <when value="tic"/> <when value="rms"/> </conditional> </when> <when value="Baseline_reduction"> <param name="blocks_baseline" type="integer" value="500" label="Blocks"/> <param name="spar_baseline" type="float" value="1.0" label="Spar value" help="Smoothing parameter for the spline smoothing applied to the spectrum in order to decide the cutoffs for throwing away false noise spikes that might occur inside peaks"/> </when> <when value="Smoothing"> <conditional name="methods_for_smoothing"> <param name="smoothing_method" type="select" label="Smoothing method"> <option value="gaussian" selected="True">gaussian</option> <option value="sgolay">Savitsky-Golay</option> <option value="ma">moving average</option> </param> <when value="gaussian"> <param name="sd_gaussian" type="float" value="2" label="The standard deviation for the Gaussian kernel. Default = window/4"/> </when> <when value="sgolay"> <param name="order_of_filters" type="integer" value="3" label="The order of the smoothing filter, must be smaller than window size"/> <param name="replace_negatives" type="boolean" label="Replace negative intensities with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Savitzky golay smoothing can introduce negative intensity values it is recommended to replace them with zero"/> </when> <when value="ma"> <param name="coefficients_ma_filter" type="float" value="1" label="The coefficients for the moving average filter"/> </when> </conditional> <param name="window_smoothing" type="float" value="8" label="Window size"/> </when> <when value="mz_alignment"> <param name="alignment_tol" type="text" value="NA" label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data."> <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance."> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <conditional name="mzalign_ref_type"> <param name="align_reference_datatype" type="select" label="Choose reference"> <option value="align_noref" selected="True">use mean spectrum as reference</option> <option value="align_table" >m/z values from tabular file as reference</option> </param> <when value="align_noref"/> <when value="align_table"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> </when> </conditional> <param name="quantile" type="float" value="0.2" label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/> <param name="span" type="float" value="0.75" label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> </when> <when value="mz_recalibration"> <param name="alignment_tol" type="text" value="NA" label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data."> <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance."> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> <param name="quantile" type="float" value="0.2" label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/> <param name="span" type="float" value="0.75" label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> </when> <when value="Peak_picking"> <param name="SNR_picking_method" type="float" value="6" label="Signal to noise ratio" help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/> <param name="blocks_picking" type="integer" value="100" label = "Number of blocks" help="Number of blocks in which to divide mass spectrum to calculate noise"/> <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/> <conditional name="methods_for_picking"> <param name="picking_method" type="select" label="Peak picking method"> <option value="adaptive" selected="True">adaptive</option> <option value="mad">mad</option> <option value="simple">simple</option> </param> <when value="adaptive"> <param name="spar_picking" type="float" value="1.0" label="Spar value" help = "Smoothing parameter for the spline smoothing applied to the spectrum in order to decide the cutoffs for throwing away false noise spikes that might occur inside peaks"/> </when> <when value="mad"/> <when value="simple"/> </conditional> </when> <when value="Peak_alignment"> <param name="value_diffalignment" type="float" value="200" label="tolerance" help="Peaks that differ less than this value will be aligned together"/> <param name="units_diffalignment" type="select" display="radio" optional="False" label="units"> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <conditional name="align_ref_type"> <param name="align_reference_datatype" type="select" label="Choose reference"> <option value="align_noref" selected="True">no reference</option> <option value="align_table" >m/z values from tabular file as reference</option> </param> <when value="align_noref"/> <when value="align_table"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> </when> </conditional> </when> <when value="Peak_filtering"> <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/> </when> <when value="Peak_binning"> <expand macro="reading_1_column_mz_tabular" label="A reference to which the peaks are binned." help="Tabular file with m/z features to extract from input file"/> <param name="peakbin_tol" value="NA" type="text" label="The tolerance to be used when matching the m/z features in the dataset to the reference. If this is NA, then automatically guess a resolution from the data." > <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="peakbin_units" display="radio" type="select" label="The units to use for the tolerance."> <option value="mz" >mz</option> <option value="ppm" selected="True" >ppm</option> </param> <param name="peaks_type" type="select" display="radio" label="Should the peak height or area under the curve be taken as the intensity value?"> <option value="height" selected="True">height</option> <option value="area">area</option> </param> </when> <when value="Mass_binning"> <param name="bin_width" type="float" value="1" label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/> <param name="bin_units" type="select" display="radio" label="Unit for bin"> <option value="mz" selected="True">mz</option> <option value="ppm">ppm</option> </param> <param name="bin_fun" type="select" display="radio" label="Calculate sum or mean intensity for ions of the same bin"> <option value="mean" selected="True">mean</option> <option value="sum">sum</option> </param> <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> <conditional name="mz_range"> <param name="features_filtering" type="select" label="Select m/z options"> <option value="none" selected="True">none</option> <option value="change_mz_range">change m/z range</option> <option value="bin_to_reference">bin m/z to reference</option> </param> <when value="none"/> <when value="change_mz_range"> <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> </when> <when value="bin_to_reference"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/> </when> </conditional> </when> <when value="Transformation"> <conditional name="transf_conditional"> <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> <option value="log2" selected="True">log2</option> <option value="sqrt">sqrt</option> </param> <when value="log2"> <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> </when> <when value="sqrt"/> </conditional> </when> </conditional> </repeat> </inputs> <outputs> <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> </outputs> <tests> <test> <expand macro="infile_imzml"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <conditional name="methods_for_normalization"> <param name="normalization_method" value="tic"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Smoothing"/> <conditional name="methods_for_smoothing"> <param name="smoothing_method" value="sgolay"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <conditional name="methods_for_picking"> <param name="picking_method" value="adaptive"/> </conditional> <param name="blocks_picking" value="3"/> <param name="window_picking" value="3"/> <param name="SNR_picking_method" value="3"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_filtering"/> <param name="frequ_filtering" value="0.3"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Transformation"/> <conditional name="transf_conditional"> <param name="trans_type" value="sqrt"/> </conditional> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <param name="infile" value="3_files_combined.RData" ftype="rdata"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <param name="blocks_picking" value="3"/> <param name="window_picking" value="5"/> <param name="SNR_picking_method" value="2"/> <conditional name="methods_for_picking"> <param name="picking_method" value="adaptive"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="infile_analyze75"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <conditional name="methods_for_normalization"> <param name="normalization_method" value="rms"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <param name="blocks_picking" value="20"/> <param name="window_picking" value="5"/> <param name="SNR_picking_method" value="2"/> <conditional name="methods_for_picking"> <param name="picking_method" value="mad"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results3.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="infile_analyze75"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <param name="normalization_method" value="tic"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="mz_alignment"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="mz_recalibration"/> <param name="alignment_tol" value="2"/> <param name="alignment_units" value="ppm"/> <param name="mz_tabular" value="inputcalibrantfile2.txt" ftype="tabular"/> <param name="feature_column" value="1"/> <param name="feature_header" value="TRUE"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Mass_binning"/> <param name="bin_width" value="0.1"/> <param name="bin_units" value="mz"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results4.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results4.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results4.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Transformation"/> <conditional name="transf_conditional"> <param name="trans_type" value="log2"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Baseline_reduction"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size" delta="13000"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/> </output> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed after the data is read by the tool. @MZ_TABULAR_INPUT_DESCRIPTION@ **Options** - Normalization: normalization of intensities to total ion current (TIC) or to root-mean-square (RMS) - Baseline reduction: baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) - Smoothing: Smoothing of the peaks reduces noise and improves peak detection - m/z alignment: removes small m/z shifts between spectra - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values - m/z binning: generates new m/z bins - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. **Output** - MSI data as continuous imzML file - pdf with key values and four random mass spectra after each processing step ]]> </help> <expand macro="citations"/> </tool>