view msi_preprocessing.xml @ 9:4d5578b57a77 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 13:43:04 -0400
parents d77c5228fd1a
children df8d7f6f210b
line wrap: on
line source

<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6">
    <description>
        mass spectrometry imaging preprocessing
    </description>
    <requirements>
        <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
        <requirement type="package" version="2.2.1">r-gridextra</requirement>
        <requirement type="package" version="0.20-35">r-lattice</requirement>
        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
    </requirements>
    <command detect_errors="exit_code">
    <![CDATA[

        #if $infile.ext == 'imzml'
            ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
            ln -s '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            ln -s '${infile.extra_files_path}/hdr' infile.hdr &&
            ln -s '${infile.extra_files_path}/img' infile.img &&
            ln -s '${infile.extra_files_path}/t2m' infile.t2m &&
        #else
            ln -s '$infile' infile.RData &&
        #end if
        cat '${cardinal_preprocessing}' &&
        Rscript '${cardinal_preprocessing}'

    ]]>
    </command>
    <configfiles>
        <configfile name="cardinal_preprocessing"><![CDATA[

################################# load libraries and read file #################

library(Cardinal)
library(gridExtra)
library(lattice)
library(ggplot2)

#if $infile.ext == 'imzml'
    #if str($processed_cond.processed_file) == "processed":
        msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
    #else
        msidata <- readImzML('infile', attach.only=TRUE)
    #end if
#elif $infile.ext == 'analyze75'
    msidata = readAnalyze('infile', attach.only=TRUE)
#else
    loadRData <- function(fileName){
    load(fileName)
    get(ls()[ls() != "fileName"])
    }
    msidata = loadRData('infile.RData')
#end if

print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))

## function to later read RData reference files in

loadRData <- function(fileName){
#loads an RData file, and returns it
load(fileName)
get(ls()[ls() != "fileName"])
}

if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){
    ######################### preparations for QC report #################

        maxfeatures = length(features(msidata))
        medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
        medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2)
        minmz = round(min(mz(msidata)), digits=2)
        maxmz = round(max(mz(msidata)), digits=2)
        QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint))
        vectorofactions = "inputdata"

    ############################### Preprocessing steps ###########################
    ###############################################################################

    #for $method in $methods:

    ############################### Normalization ###########################

        #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization':
            print('Normalization')
            ##normalization

            msidata = normalize(msidata, method="tic")

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),)
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, normalized)
                vectorofactions = append(vectorofactions, "normalized")

    ############################### Baseline reduction ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
            print('Baseline_reduction')
            ##baseline reduction

            msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, baseline)
                vectorofactions = append(vectorofactions, "baseline red.")

    ############################### Smoothing ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
            print('Smoothing')
            ## Smoothing

            #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
                print('gaussian smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)

            #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
                print('sgolay smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
            #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
                print('sgolay smoothing')

                msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)

            #end if

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, smoothed)
                vectorofactions = append(vectorofactions, "smoothed")

    ############################### Peak picking ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
            print('Peak_picking')
            ## Peakpicking

            ## remove duplicated coordinates, otherwise peak picking will fail
            print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed"))
            msidata <- msidata[,!duplicated(coord(msidata))]

            #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
                print('adaptive peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking)

            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic':
                print('limpic peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking)

            #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple':
                print('simple peakpicking')

                msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)

            #end if

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, picked)
                vectorofactions = append(vectorofactions, "picked")

    ############################### Peak alignment ###########################

        #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
            print('Peak_alignment')
            ## Peakalignment

            #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref':

                align_peak_reference = msidata

            #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table':

                align_reference_table = read.delim("$method.methods_conditional.align_ref_type.align_peaks_table", header = FALSE, stringsAsFactors = FALSE)
                align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column]
                align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))]
                if (length(align_peak_reference) == 0)
                    {align_peak_reference = 0}
           
            #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref':

                    align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata')

            #end if

            #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff':
                print('diff peakalignment')

                msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference)

           #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP':
                print('DPpeakalignment')

            msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)

           #end if

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, aligned)
                vectorofactions = append(vectorofactions, "aligned")

    ############################### Peak filtering ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
            print('Peak_filtering')

            msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, filtered)
                vectorofactions = append(vectorofactions, "filtered")

    ############################### Data reduction ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
            print('Data_reduction')

            #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
                print('bin reduction')

                msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)

            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
                print('resample reduction')

                msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)

            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks':
                print('peaks reduction')

                #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table':

                    reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.peaks_table", header = FALSE, stringsAsFactors = FALSE)
                    reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column]
                    peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))]

                #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref':

                        peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')

                #end if

                msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
            #end if
            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, reduced)
                vectorofactions = append(vectorofactions, "reduced")

        ############################### Transformation ###########################

        #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
            print('Transformation')

            #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
                print('log2 transformation')

                spectra(msidata)[][spectra(msidata)[] ==0] = NA
                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
                spectra(msidata)[] = log2(spectra(msidata)[])

            #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
                print('squareroot transformation')

                spectra(msidata)[] = sqrt(spectra(msidata)[])

           #end if

            ############################### QC ###########################

                maxfeatures = length(features(msidata))
                medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE))
                medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
                minmz = round(min(mz(msidata)), digits=2)
                maxmz = round(max(mz(msidata)), digits=2)
                transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint)
                QC_numbers= cbind(QC_numbers, transformed)
                vectorofactions = append(vectorofactions, "transformed")

            #end if
    #end for

    ############# Outputs: summar matrix, RData, tabular and QC report #############
    ################################################################################
    ## optional summarized matrix
        print('Summarized matrix')

        ## optional annotation from tabular file to obtain groups over which to apply mean, median or sd (otherwise all pixels are considered to be sample)

        #if str($tabular_annotation.load_annotation) == 'yes_annotation':

            ## read and extract x,y,annotation information
            input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
            annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
            colnames(annotation_input) = c("x", "y", "annotation")

            ## merge with coordinate information of msidata
            msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
            colnames(msidata_coordinates)[3] = "pixel_index"
            merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
            merged_annotation[is.na(merged_annotation)] = "NA"
            merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
            msidata\$annotation = as.factor(merged_annotation[,4])

        #end if

    #if "mean" in str($summary_type).split(","):
        print("mean matrix")
        if (!is.null(levels(msidata\$annotation))){

            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
            count = 1
            for (subsample in levels(msidata\$annotation)){
            subsample_pixels = msidata[,msidata\$annotation == subsample]
            subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
            sample_matrix = cbind(sample_matrix, subsample_calc)
            count = count+1
            }
            rownames(sample_matrix) = mz(msidata)
            colnames(sample_matrix) = levels(msidata\$annotation)
            write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }else{
            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
            rownames(full_sample_calc) = mz(msidata)
            colnames(full_sample_calc) = "$infile.display_name"
            write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }

    #end if

    #if "median" in str($summary_type).split(","):
        print("median matrix")
        if (!is.null(levels(msidata\$annotation))){
            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
            count = 1
            for (subsample in levels(msidata\$annotation)){

            subsample_pixels = msidata[,msidata\$annotation == subsample] ## no idea why it does not work??? NA problem?! 

            subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)

            sample_matrix = cbind(sample_matrix, subsample_calc)
            count = count+1
            }

            rownames(sample_matrix) = mz(msidata)
            colnames(sample_matrix) = levels(msidata\$annotation)
            write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }else{
            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE))
            rownames(full_sample_calc) = mz(msidata)
            colnames(full_sample_calc) = "$infile.display_name"
            write.table(full_sample_calc, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }
    #end if

    #if "sd" in str($summary_type).split(","):
        print("sd matrix")
        if (!is.null(levels(msidata\$annotation))){
            sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
            count = 1
            for (subsample in levels(msidata\$annotation)){
            subsample_pixels = msidata[,msidata\$annotation == subsample]
            subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
            sample_matrix = cbind(sample_matrix, subsample_calc)
            count = count+1
            }

            rownames(sample_matrix) = mz(msidata)
            colnames(sample_matrix) = levels(msidata\$annotation)
            write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }else{

            full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,sd, na.rm=TRUE))
            rownames(full_sample_calc) = mz(msidata)
            colnames(full_sample_calc) = "$infile.display_name"
            write.table(full_sample_calc, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
        }
    #end if
    print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))

    ## save as (.RData)
    save(msidata, file="$msidata_preprocessed")

    ## save output matrix
    #if $output_matrix:

        if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
            spectramatrix = spectra(msidata)[]
            spectramatrix = cbind(mz(msidata),spectramatrix)
            newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix)
            write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
        }else{
            print("file has no features or pixels left")
        }
    #end if

    ## save QC report

    pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
    plot(0,type='n',axes=FALSE,ann=FALSE)
    title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
    rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity")
    grid.table(t(QC_numbers))

    #if str($tabular_annotation.load_annotation) == 'yes_annotation':

        ## the more annotation groups a file has the smaller will be the legend
        number_combined = length(levels(msidata\$annotation))
        if (number_combined<20){
            legend_size = 10
        }else if (number_combined>20 && number_combined<40){
            legend_size = 9
        }else if (number_combined>40 && number_combined<60){
            legend_size = 8
        }else if (number_combined>60 && number_combined<100){
            legend_size = 7
        }else{
            legend_size = 6
        }

        position_df = cbind(coord(msidata)[,1:2], msidata\$annotation)
        colnames(position_df)[3] = "sample_name"

        combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
               geom_tile() +
               coord_fixed()+
               ggtitle("Spatial orientation of annotated data")+
               theme_bw()+
               theme(plot.title = element_text(hjust = 0.5))+
               theme(text=element_text(family="ArialMT", face="bold", size=12))+
               theme(legend.position="bottom",legend.direction="vertical")+
               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
               guides(fill=guide_legend(ncol=5,byrow=TRUE))
        coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
        coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
        for(file_count in 1:nrow(coord_labels))
            {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
            y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}

        print(combine_plot)
    #end if

    dev.off()

}else{
    print("inputfile has no intensities > 0")
}

    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml,rdata,analyze75"
            label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
            help="load imzml and ibd file by uploading composite datatype imzml"/>
        <conditional name="processed_cond">
            <param name="processed_file" type="select" label="Is the input file a processed imzML file ">
                <option value="no_processed" selected="True">not a processed imzML</option>
                <option value="processed">processed imzML</option>
            </param>
            <when value="no_processed"/>
            <when value="processed">
                <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/>
                <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm">
                    <option value="mz" >mz</option>
                    <option value="ppm" selected="True" >ppm</option>
                </param>
            </when>
        </conditional>
        <repeat name="methods" title="Preprocessing" min="1" max="50">
            <conditional name="methods_conditional">
                <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply">
                    <option value="Normalization" selected="True">Normalization to TIC</option>
                    <option value="Baseline_reduction">Baseline Reduction</option>
                    <option value="Smoothing">Peak smoothing</option>
                    <option value="Peak_picking">Peak picking</option>
                    <option value="Peak_alignment">Peak alignment</option>
                    <option value="Peak_filtering">Peak filtering</option>
                    <option value="Data_reduction">Data reduction</option>
                    <option value="Transformation">Transformation</option>
                </param>
                <when value="Normalization"/>
                <when value="Baseline_reduction">
                    <param name="blocks_baseline" type="integer" value="50"
                        label="Blocks"/>
                </when>
                <when value="Smoothing">
                    <conditional name="methods_for_smoothing">
                        <param name="smoothing_method" type="select" label="Smoothing method">
                            <option value="gaussian" selected="True">gaussian</option>
                            <option value="sgolay">Savitsky-Golay</option>
                            <option value="ma">moving average</option>
                        </param>
                        <when value="gaussian">
                            <param name="sd_gaussian" type="float" value="4"
                                   label="The standard deviation for the Gaussian kernel (window/sd)"/>
                        </when>
                        <when value="sgolay">
                            <param name="order_of_filters" type="integer" value="3"
                                   label="The order of the smoothing filter"/>
                        </when>
                        <when value="ma">
                            <param name="coefficients_ma_filter" type="integer" value="1"
                                   label="The coefficients for the moving average filter"/>
                        </when>
                    </conditional>
                    <param name="window_smoothing" type="integer" value="9"
                                label="Window size"/>
                </when>
                <when value="Peak_picking">
                    <param name="SNR_picking_method" type="integer" value="3"
                        label="Signal to noise ratio"
                        help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/>
                    <param name="blocks_picking" type="integer" value="100" label = "Number of blocks"
                        help="Number of blocks in which to divide mass spectrum to calculate noise"/>
                    <param name="window_picking" type="integer" value="5" label= "Window size" help="Window width for seeking local maxima"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files">
                            <option value="adaptive" selected="True">adaptive</option>
                            <option value="limpic">limpic</option>
                            <option value="simple">simple</option>
                        </param>
                        <when value="adaptive">
                            <param name="spar_picking" type="float" value="1.0"
                                label="Spar value" 
                                help = "Smoothing parameter for the spline smoothing 
                                  applied to the spectrum in order to decide the cutoffs 
                                  for throwing away false noise spikes that might occur inside peaks"/>
                        </when>
                        <when value="limpic">
                            <param name="tresh_picking" type="float" value="0.75"
                                label="thresh value" help="The thresholding quantile to use when comparing slopes in order to throw away peaks that are too flat"/>
                        </when> 
                        <when value="simple"/>
                    </conditional>
                </when>
                <when value="Peak_alignment">
                    <conditional name="methods_for_alignment">
                        <param name="alignment_method" type="select" label="Alignment method">
                            <option value="diff" selected="True">diff</option>
                            <option value="DP">DP</option>
                        </param>
                        <when value="diff">
                            <param name="value_diffalignment" type="integer" value="200"
                                   label="diff.max" help="Peaks that differ less than this value will be aligned together"/>
                            <param name="units_diffalignment" type="select" display = "radio" optional = "False"
                                   label="units">
                                    <option value="ppm" selected="True">ppm</option>
                                    <option value="Da">m/z</option>
                            </param>
                        </when>
                        <when value="DP">
                            <param name="gap_DPalignment" type="integer" value="0"
                                   label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/>
                        </when>
                    </conditional>
                    <conditional name="align_ref_type">
                        <param name="align_reference_datatype" type="select" label="Choose reference">
                            <option value="align_noref" selected="True">no reference</option>
                            <option value="align_table" >tabular file as reference</option>
                            <option value="align_msidata_ref">msidata file as reference</option>
                        </param>
                        <when value="align_noref"/>
                        <when value="align_table">
                            <param name="align_peaks_table" type="data" format="tabular" 
                                label="Reference m/z values to use for alignment - only these will be kept" help="One column with m/z values (without empty cells or letters)"/>
                            <param name="align_mass_column" data_ref="align_peaks_table" label="Column with reference m/z" type="data_column"/>
                        </when>
                        <when value="align_msidata_ref">
                            <param name="align_peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/>
                        </when>
                    </conditional>
                </when>
                <when value="Peak_filtering">
                    <param name="frequ_filtering" type="integer" value="1000"
                        label="Freq.min" help="Peaks that occur in the dataset fewer times than this will be removed. Number should be between 1 (no filtering) and number of spectra (pixel)"/>
                </when>
                <when value="Data_reduction">
                    <conditional name="methods_for_reduction">
                        <param name="reduction_method" type="select" label="Reduction method">
                            <option value="bin" selected="True">bin</option>
                            <option value="resample">resample</option>
                            <option value="peaks">peaks</option>
                        </param>
                        <when value="bin">
                            <param name="bin_width" type="float" value="1"
                                   label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/>
                            <param name="bin_units" type="select" display="radio"
                                   label="Unit for bin">
                                    <option value="mz" selected="True">mz</option>
                                    <option value="ppm">ppm</option>
                            </param>
                            <param name="bin_fun" type="select" display="radio"
                                   label="Calculate sum or mean intensity for ions of the same bin">
                                    <option value="mean" selected="True">mean</option>
                                    <option value="sum">sum</option>
                            </param>
                        </when>
                        <when value="resample">
                            <param name="resample_step" type="float" value="1"
                                   label="The step size in m/z" help="Step size must be greater than range of m/z values divided by number of m/z features"/>
                        </when>
                        <when value="peaks">
                            <param name="peaks_type" type="select" display="radio"
                                   label="Should the peak height or area under the curve be taken as the intensity value?">
                                    <option value="height" selected="True">height</option>
                                    <option value="area">area</option>
                            </param>                            
                            <conditional name="ref_type">
                                <param name="reference_datatype" type="select" label="Choose reference datatype">
                                    <option value="table" selected="True">tabular file</option>
                                    <option value="msidata_ref">msidata file</option>
                                </param>
                                <when value="table">
                                    <param name="peaks_table" type="data" format="tabular" 
                                        label="Reference m/z values to use to reduce the dimension" help="One column with m/z values (without empty cells or letters, m/z outside m/z range are not used for filtering)"/>
                                    <param name="mass_column" data_ref="peaks_table" label="Column with reference m/z" type="data_column"/>
                                </when>
                                <when value="msidata_ref">
                                    <param name="peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/>
                                </when>
                            </conditional>
                        </when>
                    </conditional>
                </when>
                <when value="Transformation">
                    <conditional name="transf_conditional">
                        <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)">
                            <option value="log2" selected="True">log2</option>
                            <option value="sqrt">sqrt</option>
                        </param>
                            <when value="log2"/>
                            <when value="sqrt"/>
                    </conditional>
                </when>
            </conditional>
        </repeat>
        <param name="summary_type" type="select" display="checkboxes" multiple="true" label="Summarize all pixels of a sample and calculate the mean, median or standard deviation">
            <option value="mean">mean</option>
            <option value="median">median</option>
            <option value="sd">standard deviation</option>
        </param>
        <conditional name="tabular_annotation">
            <param name="load_annotation" type="select" label="Use pixel annotations from tabular file to summarize pixel">
                <option value="no_annotation" selected="True">summarize over all pixels</option>
                <option value="yes_annotation">summarize over categories from annotation file</option>
            </param>
                <when value="yes_annotation">
                    <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file to summarize pixel"
                    help="Tabular file with three columns: x values, y values and pixel annotations"/>
                        <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
                        <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
                        <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
                        <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
                </when>
                <when value="no_annotation"/>
        </conditional>
        <param name="output_matrix" type="boolean" label="Intensity matrix output"/>
    </inputs>
    <outputs>
        <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
        <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
        <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
            <filter>summary_type and "mean" in summary_type</filter>
        </data>
        <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix">
            <filter>summary_type and "median" in summary_type</filter>
        </data>
        <data format="tabular" name="summarized_output_sd" label="$infile.display_name sd_matrix">
            <filter>summary_type and "sd" in summary_type</filter>
        </data>
        <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix">
            <filter>output_matrix</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Smoothing"/>
                    <conditional name="methods_for_smoothing">
                        <param name="smoothing_method" value="gaussian"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" value="adaptive"/>
                    </conditional>
                    <param name="blocks_picking" value="3"/>
                    <param name="window_picking" value="3"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                    <conditional name="methods_for_alignment">
                        <param name="alignment_method" value="diff"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_filtering"/>
                    <param name="frequ_filtering" value="2"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Transformation"/>
                        <conditional name="transf_conditional">
                            <param name="trans_type" value="sqrt"/>
                        </conditional>
                </conditional>
            </repeat>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <param name="output_matrix" value="True"/>
            <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
            <output name="matrixasoutput" file="preprocessing_results1.txt"/>
            <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
        </test>
        <test expect_num_outputs="4">
            <param name="infile" value="123_combined.RData" ftype="rdata"/>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <param name="blocks_picking" value="3"/>
                    <param name="window_picking" value="5"/>
                    <param name="SNR_picking_method" value="2"/>
                    <conditional name="methods_for_picking">
                        <param name="picking_method" value="adaptive"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                    <conditional name="methods_for_alignment">
                        <param name="alignment_method" value="DP"/>
                    </conditional>
                </conditional>
            </repeat>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="yes_annotation"/>
                <param name="annotation_file" value="pixel_annotations.tabular"/>
                        <param name="column_x" value="1"/>
                        <param name="column_y" value="2"/>
                        <param name="column_names" value="3"/>
                        <param name="tabular_header" value="FALSE"/>
            </conditional>
            <param name="summary_type" value="median,sd"/>
            <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
            <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
            <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
            <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
        </test>
        <test expect_num_outputs="3">
            <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr"/>
                <composite_data value="Analyze75.img"/>
                <composite_data value="Analyze75.t2m"/>
            </param>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_picking"/>
                    <param name="blocks_picking" value="100"/>
                    <param name="window_picking" value="5"/>
                        <param name="picking_method" value="limpic"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Peak_alignment"/>
                    <conditional name="methods_for_alignment">
                        <param name="alignment_method" value="diff"/>
                    </conditional>
                </conditional>
            </repeat>
            <param name="summary_type" value="mean"/>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
            <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
            <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
        </test>
        <test expect_num_outputs="3">
            <param name="infile" value="" ftype="analyze75">
                <composite_data value="Analyze75.hdr"/>
                <composite_data value="Analyze75.img"/>
                <composite_data value="Analyze75.t2m"/>
            </param>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Normalization"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Data_reduction"/>
                    <param name="bin_width" value="0.1"/>
                </conditional>
            </repeat>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <param name="output_matrix" value="True"/>
            <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
            <output name="matrixasoutput" file="preprocessing_results4.txt"/>
            <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="preprocessing_method" value="Data_reduction"/>
                        <conditional name="methods_for_reduction">
                            <param name="reduction_method" value="resample"/>
                            <param name="step_width" value="0.1"/>
                        </conditional>
                </conditional>
            </repeat>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
             <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
            <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/>
        </test>
    </tests>
    <help>
        <![CDATA[

Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_

This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. 

Input data: 3 types of MSI data can be used:

- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
- Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking.
- optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column

Options:

- Normalization: Normalization of intensities to total ion current (TIC)
- Baseline reduction: Baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets)
- Smoothing: Smoothing of the peaks reduces noise and improves peak detection
- Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
- Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value
- Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot.
- Data reduction: binning, resampling or peak filtering to reduce data
- Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 


Output: 

- imzML file, preprocessed
- pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations
- optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values)
- optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group

Tip: 

- Peak alignment works only after peak picking
- Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks)

        ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv146</citation>
    </citations>
</tool>