view maldi_quant_peakdetection.xml @ 7:160538a890a6 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f1e1cd260ef2884d0ba12e2b614df3c72d0934dc
author galaxyp
date Sat, 04 Mar 2023 19:14:04 +0000
parents d286ff4600dd
children
line wrap: on
line source

<tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.0">
    <description>
        Peak detection, binning and filtering for mass-spectrometry imaging data
    </description>
    <macros>
        <import>maldi_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
    <![CDATA[
        #if $infile.ext == 'imzml'
            cp '${infile.extra_files_path}/imzml' infile.imzML &&
            cp '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            cp '${infile.extra_files_path}/hdr' infile.hdr &&
            cp '${infile.extra_files_path}/img' infile.img &&
            cp '${infile.extra_files_path}/t2m' infile.t2m &&
        #end if
        cat '${maldi_quant_peak_detection}' &&
        Rscript '${maldi_quant_peak_detection}' &&
        mkdir $outfile_imzml.files_path &&
        mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
        echo "imzML file:" > $outfile_imzml &&
        ls -l "$outfile_imzml.files_path" >> $outfile_imzml
    ]]>
    </command>
    <configfiles>
        <configfile name="maldi_quant_peak_detection"><![CDATA[

@R_IMPORTS@

#if $restriction_conditional.restriction == 'restrict':

    print('Reading mask region')

		## Import imzML file
		coordinate_matrix = read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE)
		coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)]
		#if str($centroids) == "TRUE"
		    peaks <- importImzMl('infile.imzML',
		                 coordinates = as.matrix(coordinate_matrix), centroided = $centroids)    
			pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")  
		#else
			maldi_data <- importImzMl('infile.imzML',
		                 coordinates = as.matrix(coordinate_matrix), centroided = $centroids)              
			pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
		#end if

#else:

    print('Reading entire file')
    ## Import imzML file

    #if $infile.ext == 'imzml'
        print('imzML file')
        #if str($centroids) == "TRUE"
            peaks <- importImzMl('infile.imzML', centroided = $centroids)
            pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
            coordinates_info = cbind(coordinates(peaks)[,1:2], c(1:length(peaks)))
        #else
            maldi_data <- importImzMl('infile.imzML', centroided = $centroids)
            pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
            coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
        #end if


    #elif $infile.ext == 'tabular'
        print('tabular file')
        #set $centroids = "TRUE" ## will be used in some if conditions
        peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE)
        peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum
        pixelnames = unique(peak_tabular\$spectrum)

        peaks = list()
        for (spectra in 1:length(peak_list))
        {
            single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr)
            peaks[[spectra]] = single_peaks
        }

    #end if
#end if

## default summarized = FALSE
summarized_spectra = FALSE

## Quality control plots during peak detection
pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

## if no filename is given, name of file in Galaxy history is used
#set $filename = $infile.display_name

title(main=paste("$filename"))

## plot input file spectrum: 
#if $centroids:
    ## Choose random spectra for QC plots
    random_spectra = sample(1:length(peaks), size=4, replace=length(peaks)<4)## in case dataset has less than 4 spectra, same spetra are allowed to be sampled
    random_spectra_name = pixelnames[random_spectra]
    par(mfrow = c(2, 2), oma=c(0,0,2,0))
    for (random_sample in random_spectra){
        plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
    title("Input spectra", outer=TRUE, line=0)

#else
    ## Choose random spectra for QC plots
    random_spectra = sample(1:length(maldi_data), size=4, replace=length(maldi_data)<4)## in case dataset has less than 4 spectra, same spetra are allowed to be sampled    
    par(mfrow = c(2, 2), oma=c(0,0,2,0))
    for (random_sample in random_spectra){
        plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))
    }
    title("Input spectra", outer=TRUE, line=0)
#end if

## QC numbers for input file
#if str($centroids) == "TRUE"
    pixel_number = length(peaks)
    minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
    maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
    mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
    medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
    number_features = length(unique(unlist(lapply(peaks,mass))))
    number_spectra = length(peaks)
    inputdata = c(minmz, maxmz,number_features,mean_features, medint, number_spectra)
    QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, number_spectra))
    vectorofactions = "inputdata"
#else
    pixel_number = length(maldi_data)
    minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
    maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
    mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
    medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
    number_features = length(unique(unlist(lapply(maldi_data,mass))))
    number_spectra = length(maldi_data)
    inputdata = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
    QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, number_spectra))
    vectorofactions = "inputdata"
#end if

#if str($tabular_annotation.load_annotation) == 'yes_annotation':

    ## read and extract x,y,annotation information
    input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
    annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
    annotation_input[,3] <- as.character(annotation_input[,3]) ## make sure annotations are character
    colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"

    ## merge provided annotation with coordinate information of MSI data
    colnames(coordinates_info)[3] = "pixel_index"
    merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
    merged_annotation[is.na(merged_annotation)] = "NA"
    ## order coordinate information according to pixel index to make sure that the order stays the same
    merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
    samples = as.factor(merged_annotation\$annotation)

    ## print annotation overview into PDF output

        combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
               geom_tile() +
               coord_fixed()+
               ggtitle("Spatial orientation of annotated data")+
               theme_bw()+
               theme(plot.title = element_text(hjust = 0.5))+
               theme(text=element_text(family="ArialMT", face="bold", size=12))+
               theme(legend.position="bottom",legend.direction="vertical")+
               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
               theme(legend.position="bottom",legend.direction="vertical")+
               guides(fill=guide_legend(ncol=4,byrow=TRUE))

        print(combine_plot)

#end if

#################### Preprocessing methods #####################################

#for $method in $methods:

    #if str( $method.methods_conditional.method ) == 'Peak_detection':
        print('peak detection')
        ##peak detection

        #if $method.methods_conditional.use_annotations:
            maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking
            pixelnames = levels(samples)
            summarized_spectra = TRUE
            random_spectra = sample(1:length(maldi_data), 4, replace=TRUE)
        #end if

        peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
                  halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)

        ## QC plot and numbers
        ## plot old spectra with baseline in blue and picked peaks in green
        par(mfrow = c(2, 2), oma=c(0,0,2,0))
        for (random_sample in random_spectra){
            noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method")
            plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
            lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
            points(peaks[[random_sample]], col="green", pch=20)}
            title("S/N in blue and picked peaks in green", outer=TRUE, line=0)

        ## plot new spectrum
        par(mfrow = c(2, 2), oma=c(0,0,2,0))
        for (random_sample in random_spectra){
            plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
        title("Picked peaks", outer=TRUE, line=0)

        pixel_number = length(peaks)
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        number_spectra = length(peaks) 
        peaks_picked = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
        QC_numbers= cbind(QC_numbers, peaks_picked)
        vectorofactions = append(vectorofactions, "peaks_picked")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}


    #elif str( $method.methods_conditional.method ) == 'monoisotopic_peaks':

        print('monoisotopic peaks')
        ##monoisotopic peaks

        ## keep peaks to plot them with monoisotopic peaks
        picked_peaks = peaks

        peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, 
                tolerance=$method.methods_conditional.tolerance,
                distance=c($method.methods_conditional.distance),
                size=$method.methods_conditional.size)

        ## plot old spectrum with picked isotopes as green dots
        par(mfrow = c(2, 2), oma=c(0,0,2,0))
        for (random_sample in random_spectra){
            plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
            points(peaks[[random_sample]], col="green", pch=20)}
            title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0)


        par(mfrow = c(2, 2), oma=c(0,0,2,0))
        for (random_sample in random_spectra){
            plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
            title("Monoisotopic peaks", outer=TRUE, line=0)

        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        number_spectra = length(peaks) 
        monoisotopes = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
        QC_numbers= cbind(QC_numbers, monoisotopes)
        vectorofactions = append(vectorofactions, "monoisotopes")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            ## only for profile imzML file: featurematrix is overwritten:
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}



    #elif str( $method.methods_conditional.method ) == 'Align':

        print('align')
        ##align spectra with 2 separate functions


        #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
            ## 1) calculate warping:
                warping_function <- determineWarpingFunctions(peaks,
                                      tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
                                      allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency)
            ## 2) warp spectra:
                peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)


        #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':

            ## create reference mass_vector from tabular file
            mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column]
            int_vector = rep(1,length(mass_vector))
            mass_list = createMassPeaks(mass_vector, int_vector)

            #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE"
            print('default alignment')

                ## 1) calculate warping:
                    warping_function <- determineWarpingFunctions(peaks,
                                          tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
                                          allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
                ## 2) warp spectra:
                    peaks = warpMassPeaks(peaks, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)

            #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE"
            print('spectra wise alignment')

                peaks_new_list =list()

                for (pixelnb in 1:length(peaks))
                {

                    ## 1) calculate warping:
                        warping_function <- determineWarpingFunctions(peaks[[pixelnb]],
                                              tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method",
                                              allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list)
                    ## 2) warp spectra:
                        peaks_new_list = warpMassPeaks(list(peaks[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch)
                }
                peaks = peaks_new_list

            #end if

        #end if

        #if $method.methods_conditional.remove_empty:
            print(paste(length(findEmptyMassObjects(peaks)), " empty spectra were removed", sep=" "))

            ## only if there are empty spectra to remove

            if (length(findEmptyMassObjects(peaks))>0)

                {

                    peaks = removeEmptyMassObjects(peaks)
                    pixelnames = paste("xy", coordinates(peaks)[,1],coordinates(peaks)[,2], sep="_")
            }
        #end if

        ## QC plot and numbers
        par(mfrow = c(2, 2), oma=c(0,0,2,0))

        for (random_sample in random_spectra){

            tryCatch(
                        {
                            plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
                        },error=function(cond) 
                        {
                            plot(NULL, xlim=c(0,0), ylim=c(0,0), ylab="intensity", xlab="m/z")
                        }
                    )
        }

        title("Aligned spectra", outer=TRUE, line=0)
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        number_spectra = length(peaks) 
        aligned = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
        QC_numbers= cbind(QC_numbers, aligned)
        vectorofactions = append(vectorofactions, "aligned")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            ## only for profile imzML file: featurematrix is overwritten:
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if

            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}


    #elif str( $method.methods_conditional.method ) == 'Binning':


        print('binning')
        ##m/z binning

        peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")

        ## QC plot and numbers
        par(mfrow = c(2, 2), oma=c(0,0,2,0))

        for (random_sample in random_spectra){

            tryCatch(
                        {
                            plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
                        },error=function(cond) 
                        {
                            plot(NULL, xlim=c(0,0), ylim=c(0,0), ylab="intensity", xlab="m/z")
                        }
                    )
        }

        title("Binned spectra", outer=TRUE, line=0)
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        number_spectra = length(peaks) 
        binned = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
        QC_numbers= cbind(QC_numbers, binned)
        vectorofactions = append(vectorofactions, "binned")

        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #elif str($centroids) == "TRUE"
                    featureMatrix <- intensityMatrix(peaks)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
        }else{print("There are no spectra with peaks left")}


    #elif str( $method.methods_conditional.method ) == 'Filtering':

        print('filtering')
        ##m/z filtering


            peaks <- filterPeaks(peaks
        #if str( $method.methods_conditional.filter_type.filter_params ) != 'min_Number':
            ,minFrequency=$method.methods_conditional.filter_type.minFrequency
        #end if
        #if str( $method.methods_conditional.filter_type.filter_params ) != 'min_Frequency':
            ,minNumber=$method.methods_conditional.filter_type.minNumber
        #end if
        ## in case of group wise filtering set grouping conditions
        #if str( $method.methods_conditional.filter_groups.filter_annot_groups ) == 'yes_grouping':
            ,mergeWhitelists=$method.methods_conditional.filter_groups.mergeWhitelists, label = samples
        #end if
            ) ## finishes filterPeaks function


        ##QC plot and numbers
        par(mfrow = c(2, 2), oma=c(0,0,2,0))

        for (random_sample in random_spectra){

            tryCatch(
                        {
                            plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
                        },error=function(cond)
                        {
                            plot(NULL, xlim=c(0,0), ylim=c(0,0), ylab="intensity", xlab="m/z")
                        }
                    )
        }

        ## QC numbers for filtered file
        title("Filtered spectra", outer=TRUE, line=0)
        minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
        maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
        mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
        medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
        number_features = length(unique(unlist(lapply(peaks,mass))))
        number_spectra = length(peaks)
        filtered = c(minmz, maxmz,number_features,mean_features,  medint, number_spectra)
        QC_numbers= cbind(QC_numbers, filtered)
        vectorofactions = append(vectorofactions, "filtered")
  
        if (length(peaks[!sapply(peaks, isEmpty)])>0){
            featureMatrix <- intensityMatrix(peaks)
            #if $infile.ext == 'imzml'
                #if str($centroids) == "FALSE"
                    featureMatrix <- intensityMatrix(peaks, maldi_data)
                #end if
            #end if
            featureMatrix2 =cbind(pixelnames, featureMatrix)
            colnames(featureMatrix2)[1] = c("mz")
            featureMatrix2 = t(featureMatrix2)
        }else{print("There are no spectra with peaks left")
              featureMatrix2 = matrix(0, ncol=1, nrow=1)}
            write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
            
    #elif str( $method.methods_conditional.method ) == 'skip_preprocessing': 
        ##for now as option to filter large files

    #end if

#end for

if (length(peaks[!sapply(peaks, isEmpty)])>0){
   ## mass peaks output
    mass_peaks = data.frame(matrix(,ncol=3, nrow=0))
    for (spectrum in 1:length(peaks)){
    spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity)
    spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass))
    mass_peaks = rbind(mass_peaks,spectrum_df)
    }
    colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
    write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
}else{print("There are no spectra with peaks left")}

## print table with QC values
rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity", "# spectra")
plot(0,type='n',axes=FALSE,ann=FALSE)
grid.table(t(QC_numbers))

dev.off()


if (summarized_spectra == FALSE){ 
    #if $infile.ext == 'imzml'
        MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=TRUE)
    #elif $infile.ext == 'tabular'
        masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
        ## extract x and y values and create the coordinate matrix in case tabular was input
        peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
        exportImzMl(peaks, file="out.imzMl", processed=TRUE, coordinates=peaklist_coordinates)
    #end if

}

    ]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml,tabular" label="MSI data" help="Input file as imzML (composite upload) or tabular peaklist (regular upload)"/>
        <param name="centroids" type="boolean" label="Centroided input" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
        <conditional name="restriction_conditional">
            <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files">
                <option value="no_restriction" selected="True">No, use all spectra</option>
                <option value="restrict">Yes, restrict to spectra of interest</option>
            </param>
            <when value="restrict">
                <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/>
                <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/>
                <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/>
                <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
            </when>
            <when value="no_restriction"/>
        </conditional>

        <conditional name="tabular_annotation">
            <param name="load_annotation" type="select" label="Spectra annotations" help="Annotations can be used for group wise peak detection or filtering">
                <option value="no_annotation" selected="True">No</option>
                <option value="yes_annotation">Yes</option>
            </param>
            <when value="yes_annotation">
                <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
                    help="Tabular file with three columns: x values, y values and pixel annotations"/>
                <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
                <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
                <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
                <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
            </when>
            <when value="no_annotation"/>
        </conditional>
        <repeat name="methods" title="Method" min="1">
            <conditional name="methods_conditional">
                <param name="method" type="select" label="Select a method">
                    <option value="Peak_detection">Peak detection</option>
                    <option value="monoisotopic_peaks">Monoisotopic peaks</option>
                    <option value="Align">Align Spectra (warping/phase correction)</option>
                    <option value="Binning">Binning</option>
                    <option value="Filtering">Filtering</option>
                    <option value="skip_preprocessing">No method</option>
                </param>
                <when value="Peak_detection">
                    <param name="peak_method" type="select" label="Noise estimation function">
                        <option value="MAD" selected="True">MAD</option>
                        <option value="SuperSmoother">SuperSmoother</option>
                    </param>
                    <param name="halfWindowSize" type="integer" value="20"
                        label="Half window size (number of data points)"
                        help="The resulting window reaches from 
                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
                            (window size is 2*halfWindowSize+1).
                            The best size differs depending on the selected smoothing method."/>
                    <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/>
                    <param name="use_annotations" type="boolean" label="Detect peaks on average mass spectra" help="Spectra with same annotation are averaged before peak detection, no imzML output" truevalue="TRUE" falsevalue="FALSE"/>
                </when>
                <when value="monoisotopic_peaks">
                    <param name="minCor" type="float" value="0.95" label="Minimal correlation"
                        help="Minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern."/>
                    <param name="tolerance" type="float" label="Tolerance" value="0.0001"
                        help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 100ppm use 0.0001" />
                    <param name="distance" type="text" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides. Multiple values can be used to find multiple charged pattern e.g. 1, 0.5 ,0.33">
                        <sanitizer invalid_char="">
                            <valid initial="string.digits">
                                <add value="," />
                                <add value=":" />
                                <add value="." />
                            </valid>
                        </sanitizer>
                    </param>
                    <param name="size" type="text" label="Size" value="3:10" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2, a range can be used.">
                        <sanitizer invalid_char="">
                            <valid initial="string.digits">
                                <add value=":" />
                            </valid>
                        </sanitizer>
                    </param>
                </when>

                <when value="Align">
                    <param name="warping_method" type="select" label="Warping methods">
                        <option value="lowess" selected="True">Lowess</option>
                        <option value="linear">Linear</option>
                        <option value="quadratic">Quadratic</option>
                        <option value="cubic">Cubic</option>
                    </param>
                    <param name="tolerance" type="float" value="0.00005"
                        label="Tolerance = abs(mz1 - mz2)/mz2"
                        help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" />
                    <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
                    <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
                    <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE"/>

                    <conditional name="reference_for_alignment">
                        <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration">
                            <option value="no_reference" selected="True">no reference</option>
                            <option value="yes_reference">reference from tabular file</option>
                        </param>
                        <when value="no_reference">
                    <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/>
                        </when>
                        <when value="yes_reference">
                            <param name="reference_file" type="data" format="tabular"
                                label="Reference m/z values"
                                help="Tabular file"/>
                            <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/>
                            <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
                            <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/>
                        </when>
                    </conditional>
                </when>

                <when value="Binning">
                    <param name="bin_tolerance" type="float" value="0.002" label="Tolerance"
                        help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
                    <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the same sample. relaxed - allows multiple peaks of the same sample in one bin.">
                    <option value="strict" selected="True" >strict</option>
                    <option value="relaxed" >relaxed</option>
                </param>
                </when>
                <when value="Filtering">
                <conditional name="filter_groups">
                        <param name="filter_annot_groups" type="select" label="m/z filtering parameters are applied to all spectra or groups of spectra" help="By default a single group is assumed and filtering will be done based on all pixels. To filter groups of spectra, an annotation annotation file has to be specified above">
                            <option value="no_grouping" selected="True">use single group</option>
                            <option value="yes_grouping">use spectra groups from annotation file</option>
                        </param>
                        <when value="no_grouping"/>
                        <when value="yes_grouping">
                			<param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
                         label="mergeWhitelists" help="Yes means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups"/>
            			</when>
				</conditional>
				<conditional name="filter_type">
                    <param name="filter_params" type="select" label="m/z filtering parameters" help="minFrequency: Removal of all peaks which occur in less than minFrequency spectra (relative treshold). minNumber: Removal of all peaks which ocur in lass than minNumber spectra (absolute threshold). If both are set the higher value from relative and absolute threshold is taken.">
                        <option value="min_Frequency" selected="True">minFrequency</option>
                        <option value="min_Number">minNumber</option>
                        <option value="both">both</option>
                    </param>
                    <when value="min_Frequency">
                    	<param name="minFrequency" type="float" value="0.25" label="Removal of all peaks which occur in less than minFrequency spectra" />
                    </when>
                    <when value="min_Number">
                        <param name="minNumber" type="float" value="1.0" label="Removal of all peaks which occur in less than minNumber spectra" />
                    </when>
                    <when value="both">
                    	<param name="minFrequency" type="float" value="0.25" label="Removal of all peaks which occur in less than minFrequency spectra" />
                    	<param name="minNumber" type="float" value="1.0" label="Removal of all peaks which occur in less than minNumber spectra" />
                    </when>
					</conditional>            
                </when>
                <when value="skip_preprocessing"/>
            </conditional>
        </repeat>
    </inputs>
    <outputs>
        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzml">
            <!--filter>methods_conditional['method'] == 'Peak_detection'</filter-->
        </data>
        <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "${tool.name} on ${on_string}: QC"/>
        <data format="tabular" name="masspeaks" label="${tool.name} on ${on_string}: peaklist"/>
        <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
                <conditional name="tabular_annotation">
                    <param name="load_annotation" value="yes_annotation"/>
                    <param name="annotation_file" value="pixel_annotations.tabular"/>
                    <param name="column_x" value="1"/>
                    <param name="column_y" value="2"/>
                    <param name="column_names" value="3"/>
                    <param name="tabular_header" value="TRUE"/>
                </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Peak_detection"/>
                    <param name="peak_method" value="SuperSmoother"/>
                    <param name="halfWindowSize" value="1"/>
                    <param name="snr" value="5"/>
                    <param name="use_annotations" value="TRUE"/>
                </conditional>
            </repeat>
            <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/>
            <output name="masspeaks" file="masspeaks1.tabular"/>
            <output name="intensity_matrix" file="int1.tabular"/>
        </test>
        <test>
            <param name="infile" value="masspeaks3_forinput.tabular"/>
            <param name="centroids" value="TRUE"/>
             <repeat name="methods">
                <conditional name="methods_conditional">
                <param name="method" value="monoisotopic_peaks"/>
                <param name="tolerance" value="0.0004"/>
                <param name="size" value="3"/>
            	</conditional>
            </repeat>
            <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
            <output name="masspeaks" file="masspeaks2.tabular"/>
            <output name="intensity_matrix" file="int2.tabular"/>
            <output name="outfile_imzml" ftype="imzml" file="peak_detection2.imzml.txt" lines_diff="4">
                <extra_files type="file" file="peak_detection2.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="peak_detection2.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
         <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
                <conditional name="tabular_annotation">
                    <param name="load_annotation" value="yes_annotation"/>
                    <param name="annotation_file" value="pixel_annotations.tabular"/>
                    <param name="column_x" value="1"/>
                    <param name="column_y" value="2"/>
                    <param name="column_names" value="3"/>
                    <param name="tabular_header" value="TRUE"/>
                </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Peak_detection"/>
                    <param name="peak_method" value="MAD"/>
                    <param name="halfWindowSize" value="10"/>
                    <param name="snr" value="2"/>
                </conditional>
            </repeat>
             <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Filtering"/>
                    <conditional name="filter_groups">
                        <param name="filter_annot_groups" value="yes_grouping"/>
                        <param name="mergeWhitelists" value="FALSE"/>
                    </conditional>
                    <conditional name="filter_type">
                        <param name="filter_params" value="min_Frequency"/>
                        <param name="minFrequency" value="0.25"/>
                    </conditional>
                </conditional>
            </repeat>
             <output name="plots" file="peakdetection3_QC.pdf" compare="sim_size"/>
            <output name="intensity_matrix" file="int3.tabular"/>
            <output name="masspeaks" file="masspeaks3.tabular"/>
            <output name="outfile_imzml" ftype="imzml" file="peak_detection3.imzml.txt" lines_diff="4">
                <extra_files type="file" file="peak_detection3.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="peak_detection3.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Processed.imzML"/>
                <composite_data value="Example_Processed.ibd"/>
            </param>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Peak_detection"/>
                    <param name="peak_method" value="SuperSmoother"/>
                    <param name="halfWindowSize" value="5"/>
                    <param name="snr" value="3"/>
                </conditional>
            </repeat>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="Binning"/>
                    <param name="bin_tolerance" value="0.01"/>
                </conditional>
            </repeat>
            <output name="plots" file="peakdetection4_QC.pdf" compare="sim_size"/>
            <output name="intensity_matrix" file="int4.tabular"/>
            <output name="masspeaks" file="masspeaks4.tabular"/>
            <output name="outfile_imzml" ftype="imzml" file="peak_detection4.imzml.txt" lines_diff="4">
                <extra_files type="file" file="peak_detection4.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="peak_detection4.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="preprocessing_results3.imzML"/>
                <composite_data value="preprocessing_results3.ibd"/>
            </param>
            <param name="centroids" value="TRUE"/>
            <conditional name="restriction_conditional">
		        <param name="restriction" value="restrict"/>
                <param name="coordinates_file" value="annotations.tabular"/>
                <param name="column_x" value="1"/>
                <param name="column_y" value="2"/>
                <param name="coordinates_header" value="TRUE"/>
            </conditional>
            <repeat name="methods">
                <conditional name="methods_conditional">
                    <param name="method" value="skip_preprocessing"/>
                </conditional>
            </repeat>
            <output name="plots" file="peakdetection5_QC.pdf" compare="sim_size"/>
            <output name="masspeaks" file="masspeaks5.tabular"/>
            <output name="outfile_imzml" ftype="imzml" file="peak_detection5.imzml.txt" lines_diff="4">
                <extra_files type="file" file="peak_detection5.imzml" name="imzml" lines_diff="6"/>
                <extra_files type="file" file="peak_detection5.ibd" name="ibd" compare="sim_size"/>
            </output>
        </test>
    </tests>
    
    <help>
        <![CDATA[

@MADLI_QUANT_DESCRIPTION@

-----

**Input data**

- MSI data: 3 types of input data can be used:

    - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/imzml/>`_
    - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names.

        ::    
        
            snr          mass      intensity   spectrum
            5.34	304.16     0.10         xy_1_1
            12.09	305        0.2          xy_1_1
            6.80	306.25     0.133        xy_1_1
            ...
            ...    


- Optional:  Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported. 

    ::

       x_coord     y_coord 
         1            1    
         2            1    
         3            1    
        ...
        ...


- Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to either detect peak on the average mass spectrum of each annotation group ('Detect peaks on average mass spectra') or to filter for peaks within pixel groups ('Group wise filtering'). This option is not compatible with tabular peak list inputs.

    ::

     x_coord     y_coord    annotation
        1            1        healthy
        2            1        healthy
        3            1        disease
       ...
       ...

**Options**

- Peak detection: detection of peaks, only possible with profile mode input. A peak is a local maximum above a user defined noise threshold.

    - Two functions exist to estimate the noise: MAD and supersmoother. 
    - SNR is an abbreviation for signal-to-noise-ratio. A local maximum has to be higher than SNR*noise to be recognize as peak.
    - 'halfWindowSize': The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]. A local maximum has to be the highest one in the given window to be recognized as peak.
    - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.


- Monoisotopic peaks: Keeps only the monoisotopic peaks

    - Based on poisson model for isotopic patterns as decribed in (`Breen et al. <https://doi.org/10.1002/1522-2683(20000601)21:11%3C2243::AID-ELPS2243%3E3.0.CO;2-K>`_)
    - Isotopic pattern can be characterized and recognized by

        - the similarity of the experimental pattern with the modelled pattern
        - the distance between consecutive isotopic peaks. For polypeptides the average distance is 1.00235 (`Park et al. <https://pubs.acs.org/doi/abs/10.1021/ac800913b>`_). Multiply charged analytes have smaller distances between the peaks (e.g. z = 1 distance = ~1; z = 2: distance = ~0.5; z = 3: distance = ~0.3333) To search for differently charged isotopic pattern multiple distances can be applied - the order matters because the first distance that matches is reported (1, 0.5, 0.3333). 
        - the size (length) of the pattern, multiple values can be applied, longer patterns are prefered over shorter ones.


- Spectra alignment (warping): alignment for (re)calibration of m/z values. 

    - without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z
    - with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended
    - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. 


- Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. The algorithm is based on the following workflow:

    1. Put all m/z in a sorted vector.
    2. Calculate differences between each neighbor.
    3. Divide the m/z vector at the largest gap (largest difference) and form a left and a right bin.
    4. Rerun step 3 for the left and/or the right bin if they don't fulfill the following criteria:

    - All peaks in a bin are near to the mean (abs(mass-meanMass)/meanMass < tolerance).
    - method == "strict": The bin doesn't contain two or more peaks of the same sample.


- Peak filtering: Removal of less frequent m/z features:

    - minFrequency : between 0 and 1: m/z has to occur in 0 - 100% of all spectra; minNumber: m/z has to occur in at least this amount of spectra --> out of those two criteria the stricter value will be used
    - Group wise filtering with pixel annotations: 'Yes' means that the filtering criteria are applied for each annotation group separately. 
    - mergeWhitelists: 'Yes' means that peaks that survive the filtering in one annotation group are also kept in other groups regardless if the filtering criteria are met in these groups
    - To filter data that was averaged before peak detection: Filtering has to be done in the same tool run as the peak detection. The filtering criteria are automatically applied per annotation group (Group wise filtering can be 'No') and not per pixel. Example: to keep only m/z that were detected in at least half of all annotation groups set minFrequency to 0.5.


**Output**

- centroided, processed imzML file, imzML file is empty when 'Detect peaks on average mass spectra' is chosen.
- pdf with mass spectra plots of four random spectra and a table with key values after each preprocessing step
- peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
- tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z.

.. _MALDIquant: http://strimmerlab.org/software/maldiquant/

        ]]>
    </help>
    <expand macro="citation"/>
</tool>