Mercurial > repos > galaxyp > cardinal_preprocessing
view preprocessing.xml @ 18:83aac7741200 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:38:14 +0000 |
parents | 611d80c0e29d |
children |
line wrap: on
line source
<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description> mass spectrometry imaging preprocessing </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${cardinal_preprocessing}' && Rscript '${cardinal_preprocessing}' && mkdir $outfile_imzml.files_path && mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && echo "imzML file:" > $outfile_imzml && ls -l "$outfile_imzml.files_path" >> $outfile_imzml ]]> </command> <configfiles> <configfile name="cardinal_preprocessing"><![CDATA[ ################################# load libraries and read file ################# ## set CPU, default = 1 if (Sys.getenv("GALAXY_SLOTS")!="") { number_cpu = 1 ## default = 1 }else{ number_cpu = as.numeric(Sys.getenv("GALAXY_SLOTS")) ##cpu set by Galaxy } library(Cardinal) library(gridExtra) library(ggplot2) library(sva) library(randomcoloR) @READING_MSIDATA_FULLY_COMPATIBLE@ ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail ## set variable to False #set $used_peak_picking = False #set $used_peak_alignment = False #set $continuous_format = False if (ncol(msidata)>0 & nrow(msidata) >0){ ## start QC report pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) ######################### preparations for QC report ################# maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, pixelcount)) vectorofactions = "inputdata" ## Choose random spectra for QC plots random_spectra = sample(pixels(msidata), 4, replace=FALSE) par(oma=c(0,0,2,0)) print(plot(msidata, pixel=random_spectra, col="black")) title("Input spectra", outer=TRUE, line=0) ############################### Preprocessing steps ########################### ############################################################################### #for $method in $methods: ############################### Normalization ########################### #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': print('Normalization') ##normalization msidata = normalize(msidata, method="$method.methods_conditional.methods_for_normalization.normalization_method") msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) normalized = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, normalized) vectorofactions = append(vectorofactions, "normalized") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after normalization", outer=TRUE, line=0) ############################### Baseline reduction ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': print('Baseline_reduction') ##baseline reduction msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) baseline = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, baseline) vectorofactions = append(vectorofactions, "baseline red.") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after baseline reduction", outer=TRUE, line=0) ############################### Smoothing ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': print('Smoothing') ## Smoothing #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': print('gaussian smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': print('sgolay smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## if selected replace negative intensities with zero #if $method.methods_conditional.methods_for_smoothing.replace_negatives: ## bring spectra matrix to disk spectra_df = as.matrix(spectra(msidata)) spectra_df[spectra_df<0] = 0 spectra(msidata) = spectra_df #end if #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': print('moving average smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) smoothed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, smoothed) vectorofactions = append(vectorofactions, "smoothed") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after smoothing", outer=TRUE, line=0) ############################### Mz alignment ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': print('m/z alignment') ## M/z alignment #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': reference_mz = read.delim("$method.methods_conditional.mzalign_ref_type.mz_tabular", header = $method.methods_conditional.mzalign_ref_type.feature_header, stringsAsFactors = FALSE) reference_mz = reference_mz[,$method.methods_conditional.mzalign_ref_type.feature_column] msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) mz_aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, mz_aligned) vectorofactions = append(vectorofactions, "mz aligned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z alignment", outer=TRUE, line=0) ############################### Mz recalibration ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_recalibration': print('m/z recalibration') ## M/z recalibration reference_mz = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE) reference_mz = reference_mz[,$method.methods_conditional.feature_column] msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## remove the reference peaks data to allow proper peak alignment afterwards metadata(featureData(msidata))['reference peaks'] <- NULL ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) mz_recal = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, mz_recal) vectorofactions = append(vectorofactions, "mz recalibrated") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z recalibration", outer=TRUE, line=0) ############################### Peak picking ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': #set $used_peak_picking = True print('Peak_picking') ## Peakpicking #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': print('adaptive peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking) #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'mad': print('mad peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple': print('simple peakpicking') msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) picked = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, picked) vectorofactions = append(vectorofactions, "picked") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak picking", outer=TRUE, line=0) ############################### Peak alignment ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': #set $used_peak_alignment = True print('Peak_alignment') ## Peakalignment #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': align_reference_table = read.delim("$method.methods_conditional.align_ref_type.mz_tabular", header = $method.methods_conditional.align_ref_type.feature_header, stringsAsFactors = FALSE) align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.feature_column] align_peak_reference = as.numeric(align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]) if (length(align_peak_reference) == 0) {align_peak_reference = 0} msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment", ref=align_peak_reference) #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref': msidata = peakAlign(msidata,tolerance =$method.methods_conditional.value_diffalignment, units = "$method.methods_conditional.units_diffalignment") #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, aligned) vectorofactions = append(vectorofactions, "aligned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after alignment", outer=TRUE, line=0) ############################### Peak filtering ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': print('Peak_filtering') msidata = peakFilter(msidata, freq.min = $method.methods_conditional.frequ_filtering) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) filtered = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, filtered) vectorofactions = append(vectorofactions, "filtered") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after filtering", outer=TRUE, line=0) ############################### Peak binning ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_binning': print('Peak_binning') ## reading reference file reference_table = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE) reference_column = reference_table[,$method.methods_conditional.feature_column] peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] msidata = peakBin(msidata, ref = peak_reference, tolerance = $method.methods_conditional.peakbin_tol, units = "$method.methods_conditional.peakbin_units", type="$method.methods_conditional.peaks_type") msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) peak_binned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, peak_binned) vectorofactions = append(vectorofactions, "peak binned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak binning", outer=TRUE, line=0) ############################### Mass binning ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': print('mass binning') #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': msidata = mse_bin = mzBin(msidata, resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") #elif str($method.methods_conditional.mz_range.features_filtering) == 'none': msidata = mse_bin = mzBin(msidata, resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference': bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE) bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column] msidata = mse_bin = mzBin(msidata, resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun", ref=bin_reference_mz) #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## optional: replace NA with 0 #if $method.methods_conditional.replace_NA_bin: ## count and replace NAs print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata))))) spectra(msidata)[is.na(spectra(msidata))] = 0 #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) reduced = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, reduced) vectorofactions = append(vectorofactions, "reduced") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z binning", outer=TRUE, line=0) ############################### Transformation ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': print('Transformation') if (class(msidata) == "MSProcessedImagingExperiment"){ msidata = as(msidata, "MSContinuousImagingExperiment") } #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': print('log2 transformation') ## replace 0 with NA to prevent Inf spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix spectra_df[spectra_df ==0] = NA print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) spectra(msidata) = spectra_df ## log transformation spectra(msidata) = log2(spectra(msidata)) ## optional: replace NA with 0 #if $method.methods_conditional.transf_conditional.replace_NA_trans: spectra(msidata)[is.na(spectra(msidata))] = 0 #end if #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': print('squareroot transformation') spectra(msidata) = sqrt(spectra(msidata)) #end if ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) transformed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, transformed) vectorofactions = append(vectorofactions, "transformed") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after transformation", outer=TRUE, line=0) ############################### ComBat batch correction ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'ComBat_batch_correction': print('ComBat batch correction of centroided data') ## load annotation tabular and define batch and condition column annotation = read.delim("$method.methods_conditional.annotation_file", header=$method.methods_conditional.feature_header, sep="\t") annotation_x = annotation[,$method.methods_conditional.x_column] annotation_y = annotation[,$method.methods_conditional.y_column] batch = annotation[,$method.methods_conditional.batch_column] condition = annotation[,$method.methods_conditional.condition_column] ### stop if not enough batches provided tryCatch( { if (unique(batch<2)) { stop(call.=FALSE) } }, error=function(cond) { ## in case user provided an annotation tabular with less than two batches message("Error in annotation tabular") message("Possible problems: Annotation tabular file has not enough batch levels - to perform ComBat at least 2 batches and 2 pixels per batch are necessary") stop(call.=FALSE) } ) ## get intensity matrix from imzml file intensity_matrix = as.matrix(iData(msidata)) mz_names = paste0("mz_", mz(msidata)) pixel_names = paste0("xy_", msidata@elementMetadata@coord@listData[["x"]], "_", msidata@elementMetadata@coord@listData[["y"]]) rownames(intensity_matrix) = mz_names colnames(intensity_matrix) = pixel_names ## reorder columns of intensity matrix to row order of batch column rownames(annotation) = paste0("xy_", annotation_x, "_", annotation_y) col_order = rownames(annotation) ### stop if pixel/sample names (columns) in intensity matrix from imzml file don't match samples names (rows) in annotation tabular file tryCatch( { if (all(colnames(intensity_matrix) %in% col_order == FALSE)) { stop(call.=FALSE) } }, error=function(cond) { ## in case pixel names (columns) from the imzml file don't match the pixel names in the annotation tabular file message("Error in annotation tabular") message("Possible problems: Annotation tabular file does not contain the correct pixel names (columns) from the imzml file)") stop(call.=FALSE) } ) intensity_matrix = intensity_matrix[, col_order] print("columns have been ordered to annotation row order") ## execution of ComBat algorithm from sva package combat_data = ComBat(dat = intensity_matrix, batch = batch, mod = NULL, par.prior = TRUE, prior.plots = FALSE) print("Combat has been executed") ## change intensity data of loaded imzml file after combat has been performed iData(msidata) = as.matrix(combat_data) ############################### QC ########################### maxfeatures =nrow(msidata) pixelcount = ncol(msidata) minmz = round(min(mz(msidata)), digits=2) maxmz = round(max(mz(msidata)), digits=2) batch_corrected = c(minmz, maxmz, maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, batch_corrected) vectorofactions = append(vectorofactions, "batch_corrected") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after ComBat batch correction", outer=TRUE, line=0) ## PCA plot function and execution combat_data = as.data.frame(combat_data) intensity_data = as.data.frame(intensity_matrix) ## PCA function plot_PCA = function(input_data, condition, batch, title, color){ data <- input_data pca_data <- prcomp(t(data[, seq_len(ncol(input_data))])) pca_sdev <- pca_data[["sdev"]] pca_data_perc <- round(100 * pca_sdev^2 / sum(pca_sdev^2), 1) pca_components <- pca_data[["x"]] df_pca_data <- data.frame(PC1 = pca_components[, 1], PC2 = pca_components[, 2], sample = colnames(input_data), condition = condition) ggplot(df_pca_data, aes(PC1, PC2, color = as.factor(batch), shape = as.factor(condition))) + ggtitle(title) + geom_point(size = 4) + stat_ellipse(aes(PC1, PC2, color = as.factor(batch), group = as.factor(batch)), type = "norm")+ scale_color_manual(values=color) + theme_bw() + theme(legend.position = "bottom", legend.box="vertical", plot.title = element_text(size = 12, hjust = 0.5), axis.title = element_text(size = 12), axis.text = element_text(size = 12, color = "black")) + labs(x=paste0("PC1 (",pca_data_perc[1],")"), y=paste0("PC2 (",pca_data_perc[2],")")) + labs(color = "Batches", shape = "Conditions")} ## define colors color_pal = distinctColorPalette(length(levels(as.factor(batch)))) ## execution of PCA plots PCA_bc = plot_PCA(intensity_data, condition, batch, "before batch correction", color_pal) PCA_ac = plot_PCA(combat_data, condition, batch, "batch corrected", color_pal) print(PCA_bc) print(PCA_ac) #end if #end for ############# Outputs: RData, imzml and QC report ############# ################################################################################ ## save msidata as imzML file, will only work if there is at least 1 m/z left if (nrow(msidata) > 0){ ## make sure that coordinates are integers coord(msidata)\$y = as.integer(coord(msidata)\$y) coord(msidata)\$x = as.integer(coord(msidata)\$x) ## only continuous files can currently be exported msidata = as(msidata, "MSContinuousImagingExperiment") writeImzML(msidata, "out") } plot(0,type='n',axes=FALSE,ann=FALSE) rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") grid.table(t(QC_numbers)) dev.off() }else{ print("inputfile has no intensities > 0") } ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <repeat name="methods" title="Preprocessing" min="1" max="50"> <conditional name="methods_conditional"> <param name="preprocessing_method" type="select" label="Preprocessing methods"> <option value="Normalization" selected="True">Intensity Normalization</option> <option value="Baseline_reduction">Baseline Reduction</option> <option value="Smoothing">Peak smoothing</option> <option value="mz_alignment">m/z alignment</option> <option value="mz_recalibration">m/z recalibration</option> <option value="Peak_picking">Peak picking</option> <option value="Peak_alignment">Peak alignment</option> <option value="Peak_filtering">Peak filtering</option> <option value="Peak_binning">Peak binning to reference peaks</option> <option value="Mass_binning">m/z binning</option> <option value="Transformation">Transformation</option> <option value="ComBat_batch_correction">ComBat batch correction of centroided data</option> </param> <when value="Normalization"> <conditional name="methods_for_normalization"> <param name="normalization_method" type="select" label="Normalization method"> <option value="tic" selected="True">TIC</option> <option value="rms">RMS</option> </param> <when value="tic"/> <when value="rms"/> </conditional> </when> <when value="Baseline_reduction"> <param name="blocks_baseline" type="integer" value="500" label="Blocks"/> <param name="spar_baseline" type="float" value="1.0" label="Spar value" help="Smoothing parameter for the spline smoothing applied to the spectrum in order to decide the cutoffs for throwing away false noise spikes that might occur inside peaks"/> </when> <when value="Smoothing"> <conditional name="methods_for_smoothing"> <param name="smoothing_method" type="select" label="Smoothing method"> <option value="gaussian" selected="True">gaussian</option> <option value="sgolay">Savitsky-Golay</option> <option value="ma">moving average</option> </param> <when value="gaussian"> <param name="sd_gaussian" type="float" value="2" label="The standard deviation for the Gaussian kernel. Default = window/4"/> </when> <when value="sgolay"> <param name="order_of_filters" type="integer" value="3" label="The order of the smoothing filter, must be smaller than window size"/> <param name="replace_negatives" type="boolean" label="Replace negative intensities with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Savitzky golay smoothing can introduce negative intensity values it is recommended to replace them with zero"/> </when> <when value="ma"> <param name="coefficients_ma_filter" type="float" value="1" label="The coefficients for the moving average filter"/> </when> </conditional> <param name="window_smoothing" type="float" value="8" label="Window size"/> </when> <when value="mz_alignment"> <param name="alignment_tol" type="text" value="NA" label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data."> <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance."> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <conditional name="mzalign_ref_type"> <param name="align_reference_datatype" type="select" label="Choose reference"> <option value="align_noref" selected="True">use mean spectrum as reference</option> <option value="align_table" >m/z values from tabular file as reference</option> </param> <when value="align_noref"/> <when value="align_table"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> </when> </conditional> <param name="quantile" type="float" value="0.2" label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/> <param name="span" type="float" value="0.75" label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> </when> <when value="mz_recalibration"> <param name="alignment_tol" type="text" value="NA" label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data."> <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance."> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> <param name="quantile" type="float" value="0.2" label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/> <param name="span" type="float" value="0.75" label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> </when> <when value="Peak_picking"> <param name="SNR_picking_method" type="float" value="6" label="Signal to noise ratio" help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/> <param name="blocks_picking" type="integer" value="100" label = "Number of blocks" help="Number of blocks in which to divide mass spectrum to calculate noise"/> <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/> <conditional name="methods_for_picking"> <param name="picking_method" type="select" label="Peak picking method"> <option value="adaptive" selected="True">adaptive</option> <option value="mad">mad</option> <option value="simple">simple</option> </param> <when value="adaptive"> <param name="spar_picking" type="float" value="1.0" label="Spar value" help = "Smoothing parameter for the spline smoothing applied to the spectrum in order to decide the cutoffs for throwing away false noise spikes that might occur inside peaks"/> </when> <when value="mad"/> <when value="simple"/> </conditional> </when> <when value="Peak_alignment"> <param name="value_diffalignment" type="float" value="200" label="tolerance" help="Peaks that differ less than this value will be aligned together"/> <param name="units_diffalignment" type="select" display="radio" optional="False" label="units"> <option value="ppm" selected="True">ppm</option> <option value="mz">m/z</option> </param> <conditional name="align_ref_type"> <param name="align_reference_datatype" type="select" label="Choose reference"> <option value="align_noref" selected="True">no reference</option> <option value="align_table" >m/z values from tabular file as reference</option> </param> <when value="align_noref"/> <when value="align_table"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> </when> </conditional> </when> <when value="Peak_filtering"> <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/> </when> <when value="Peak_binning"> <expand macro="reading_1_column_mz_tabular" label="A reference to which the peaks are binned." help="Tabular file with m/z features to extract from input file"/> <param name="peakbin_tol" value="NA" type="text" label="The tolerance to be used when matching the m/z features in the dataset to the reference. If this is NA, then automatically guess a resolution from the data." > <sanitizer> <valid initial="string.digits"> <add value="N" /> <add value="A" /> </valid> </sanitizer> </param> <param name="peakbin_units" display="radio" type="select" label="The units to use for the tolerance."> <option value="mz" >mz</option> <option value="ppm" selected="True" >ppm</option> </param> <param name="peaks_type" type="select" display="radio" label="Should the peak height or area under the curve be taken as the intensity value?"> <option value="height" selected="True">height</option> <option value="area">area</option> </param> </when> <when value="Mass_binning"> <param name="bin_width" type="float" value="1" label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/> <param name="bin_units" type="select" display="radio" label="Unit for bin"> <option value="mz" selected="True">mz</option> <option value="ppm">ppm</option> </param> <param name="bin_fun" type="select" display="radio" label="Calculate sum or mean intensity for ions of the same bin"> <option value="mean" selected="True">mean</option> <option value="sum">sum</option> </param> <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> <conditional name="mz_range"> <param name="features_filtering" type="select" label="Select m/z options"> <option value="none" selected="True">none</option> <option value="change_mz_range">change m/z range</option> <option value="bin_to_reference">bin m/z to reference</option> </param> <when value="none"/> <when value="change_mz_range"> <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> </when> <when value="bin_to_reference"> <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/> </when> </conditional> </when> <when value="Transformation"> <conditional name="transf_conditional"> <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> <option value="log2" selected="True">log2</option> <option value="sqrt">sqrt</option> </param> <when value="log2"> <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> </when> <when value="sqrt"/> </conditional> </when> <when value="ComBat_batch_correction"> <param name="annotation_file" type="data" format="tabular" label="Annotation file that contains the pixel x and y coordinates, the batch identifier, and the condition annotation for each spectrum." help="Annotation tabular file that contains the batch identifier for each spectrum in one column."/> <param name="x_column" type="data_column" data_ref="annotation_file" label="X coordinates" help="Column with x coordinates of pixels."/> <param name="y_column" type="data_column" data_ref="annotation_file" label="Y coordinates" help="Column with y coordinates of pixels."/> <param name="batch_column" type="data_column" data_ref="annotation_file" label="Batch column" help="The column that contains the batch identifier for each spectrum."/> <param name="condition_column" type="data_column" data_ref="annotation_file" label="Condition column" help="The column that contains the condition annotation for each spectrum. Typically these are the groups you want to compare. If not applicable, the batch column can be selected again as this information is only used for the QC plot."/> <param name="feature_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> </conditional> </repeat> </inputs> <outputs> <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> </outputs> <tests> <test> <expand macro="infile_imzml"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <conditional name="methods_for_normalization"> <param name="normalization_method" value="tic"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Smoothing"/> <conditional name="methods_for_smoothing"> <param name="smoothing_method" value="sgolay"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <conditional name="methods_for_picking"> <param name="picking_method" value="adaptive"/> </conditional> <param name="blocks_picking" value="3"/> <param name="window_picking" value="3"/> <param name="SNR_picking_method" value="3"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_filtering"/> <param name="frequ_filtering" value="0.3"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Transformation"/> <conditional name="transf_conditional"> <param name="trans_type" value="sqrt"/> </conditional> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <param name="infile" value="3_files_combined.RData" ftype="rdata"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <param name="blocks_picking" value="3"/> <param name="window_picking" value="5"/> <param name="SNR_picking_method" value="2"/> <conditional name="methods_for_picking"> <param name="picking_method" value="adaptive"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="infile_analyze75"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <conditional name="methods_for_normalization"> <param name="normalization_method" value="rms"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_picking"/> <param name="blocks_picking" value="20"/> <param name="window_picking" value="5"/> <param name="SNR_picking_method" value="2"/> <conditional name="methods_for_picking"> <param name="picking_method" value="mad"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results3.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="infile_analyze75"/> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Normalization"/> <param name="normalization_method" value="tic"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="mz_alignment"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="mz_recalibration"/> <param name="alignment_tol" value="2"/> <param name="alignment_units" value="ppm"/> <param name="mz_tabular" value="inputcalibrantfile2.txt" ftype="tabular"/> <param name="feature_column" value="1"/> <param name="feature_header" value="FALSE"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Mass_binning"/> <param name="bin_width" value="0.1"/> <param name="bin_units" value="mz"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results4.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results4.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results4.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Transformation"/> <conditional name="transf_conditional"> <param name="trans_type" value="log2"/> </conditional> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Baseline_reduction"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size" delta="13000"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <param name="infile" value="" ftype="imzml"> <composite_data value="Combat_40pixel.imzML" /> <composite_data value="Combat_40pixel.ibd"/> </param> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="ComBat_batch_correction"/> <param name="annotation_file" value="annotation_40pixel.tabular" ftype="tabular"/> <param name="feature_header" value="TRUE"/> <param name="x_column" value="2"/> <param name="y_column" value="3"/> <param name="batch_column" value="4"/> <param name="condition_column" value="6"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results_combat_40pixel.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results_combat_40pixel.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results_combat_40pixel.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="preprocessing_results_combat_40pixel.ibd" name="ibd" compare="sim_size"/> </output> </test> <test> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="ComBat_batch_correction"/> <param name="annotation_file" value="Example_processed_ComBat_annotation.tabular" ftype="tabular"/> <param name="feature_header" value="TRUE"/> <param name="x_column" value="2"/> <param name="y_column" value="3"/> <param name="batch_column" value="4"/> <param name="condition_column" value="5"/> </conditional> </repeat> <output name="QC_overview" file="ComBat_results_Example_processed_file.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="ComBat_results_Example_processed_file.imzml.txt" compare="sim_size"> <extra_files type="file" file="ComBat_results_Example_processed_file.imzml" name="imzml" lines_diff="6"/> <extra_files type="file" file="ComBat_results_Example_processed_file.ibd" name="ibd" compare="sim_size"/> </output> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed after the data is read by the tool. @MZ_TABULAR_INPUT_DESCRIPTION@ **Options** - Normalization: normalization of intensities to total ion current (TIC) or to root-mean-square (RMS) - Baseline reduction: baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) - Smoothing: Smoothing of the peaks reduces noise and improves peak detection - m/z alignment: removes small m/z shifts between spectra - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values - m/z binning: generates new m/z bins - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. - ComBat batch correction: corrects the intensity values of picked m/z features according to batches given in an annotation table. For now, it can only be applied to m/z features after peak picking (=centroided data). The annotation table needs to contain the x and y coordinates for each pixel and a batch identifier (e.g. TMA_1, TMA_2, TMA_3). Additionally, a condition column can be provided, which is only used for the PCA plots in the pdf file. Example of annotation file for ComBat batch correction: :: x_coord y_coord batch_identifier condition 10 29 TMA_1 A 22 14 TMA_1 B 22 27 TMA_2 A 23 7 TMA_2 B 29 45 TMA_3 A 33 41 TMA_3 B ... ... **Output** - MSI data as continuous imzML file - pdf with key values and four random mass spectra after each processing step ]]> </help> <expand macro="citations"/> </tool>