view quality_report.xml @ 10:f365bad862c9 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 6e8b69ee3aff3c93f745a5de11cc9169130f2e5e"
author galaxyp
date Thu, 24 Sep 2020 11:44:48 +0000
parents 0d4d4f16d455
children f396c176f366
line wrap: on
line source

<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0">
    <description>
        mass spectrometry imaging QC
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="2.3">r-gridextra</requirement>
        <requirement type="package" version="3.3.2">r-ggplot2</requirement>
        <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
        <requirement type="package" version="2.23_17">r-kernsmooth</requirement>
        <requirement type="package" version="1.1.1">r-scales</requirement>
        <requirement type="package" version="1.0.12">r-pheatmap</requirement>
    </expand>
    <command detect_errors="exit_code">
    <![CDATA[
        @INPUT_LINKING@
        cat '${cardinal_qualitycontrol_script}' &&
        Rscript '${cardinal_qualitycontrol_script}'
    ]]>
    </command>
    <configfiles>
        <configfile name="cardinal_qualitycontrol_script"><![CDATA[

################################# load libraries and read file #################

library(Cardinal)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(KernSmooth)
library(scales)
library(pheatmap)

@READING_MSIDATA@

## in case RData input is MSImageSet:
if (class(msidata) == "MSImageSet"){
    msidata = as(msidata, "MSImagingExperiment")
    run(msidata) = "infile"
    }

## remove duplicated coordinates
msidata <- msidata[,!duplicated(coord(msidata))]

## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)

#if str($tabular_annotation.load_annotation) == 'yes_annotation':

    ## read and extract x,y,annotation information
    input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
    annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
    annotation_name = colnames(annotation_input)[3] ##extract header for annotations to later export tabular with same name
    colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"

    ## merge with coordinate information of msidata
    msidata_coordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata)))
    colnames(msidata_coordinates) = c("x", "y", "pixel_index")
    merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
    merged_annotation[is.na(merged_annotation)] = "NA"
    merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
    msidata\$annotation = as.factor(merged_annotation[,4])

#end if

###################### calculation of data properties ################################
@DATA_PROPERTIES_INRAM@

## Median intensities
medint = round(median(spectra(msidata), na.rm=TRUE), digits=2)
## Spectra multiplied with m/z (potential number of peaks)
numpeaks = ncol(msidata)*nrow(msidata)
## Percentage of intensities > 0
percpeaks = round(npeaks/numpeaks*100, digits=2)
## Number of empty TICs
TICs = pixelApply(msidata, sum)
NumemptyTIC = sum(TICs == 0)
## Median und sd TIC
medTIC = round(median(TICs), digits=1)
sdTIC = round(sd(TICs), digits=0)
## Median and sd # peaks per spectrum
medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
## Processing informations
centroidedinfo = centroided(msidata)

############## Read and filter tabular file with m/z ###########################

### reading m/z input (calibrant) file:

#if $calibrant_file:

    calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c(" ","","NA"), stringsAsFactors = FALSE)
    calibrant_list = calibrant_list[,c($mz_column, $name_column)]

    ### calculate how many input calibrant m/z are valid: 

    inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,]
    number_calibrants_in = length(calibrant_list[,1])
    number_calibrants_valid = length(inputcalibrants[,1])

#else

    inputcalibrants = as.data.frame(matrix(, nrow = 0, ncol = 2))
    number_calibrants_in = 0
    number_calibrants_valid = 0

#end if

## rename input dataframe and extract m/z
colnames(inputcalibrants) = c("m/z", "name")
inputcalibrantmasses = inputcalibrants[,1]


######################################## PDF #############################################
##########################################################################################
##########################################################################################

pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

## if no filename is given, name of file in Galaxy history is used

#if not $filename:
    #set $filename = $infile.display_name
#end if

title(main=paste("$filename"))

################# I) file properties in numbers ################################
################################################################################
print("properties in numbers")

properties2 = c("Median of intensities",
               "Intensities > 0",
               "Number of empty spectra",
               "Median TIC ± sd", 
               "Median # peaks per spectrum ± sd",
               "Centroided", 
               paste0("input m/z (#valid/#input) in \n", "$calibrant_file.display_name"))

values2 = c(paste0(medint),
           paste0(percpeaks, " %"), 
           paste0(NumemptyTIC), 
           paste0(medTIC, " ± ", sdTIC),
           paste0(medpeaks, " ± ",sdpeaks),
           paste0(centroidedinfo), 
           paste0(number_calibrants_valid, " / ", number_calibrants_in))

property_df2 = data.frame(properties2, values2)
colnames(property_df2) = c("properties", "values")

property_df = rbind(property_df, property_df2)

grid.table(property_df, rows= NULL)


####################### II) x-y images #######################################
##############################################################################
print("x-y images")


## only do plots for file with intensity peaks

if (npeaks > 0){

    ## function for density plots
    plot_colorByDensity = function(x1,x2,
                                   ylim=c(min(x2),max(x2)),
                                   xlim=c(min(x1),max(x1)),
                                   xlab="",ylab="",main=""){
      df = data.frame(x1,x2)
      x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
      df\$dens = col2rgb(x)[1,] + 1L
      cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
      df\$col = cols[df\$dens]
      plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col,
           cex=1,xlab=xlab,ylab=ylab,las=1, main=main)}


    ################### 0) overview for combined data ###########################

    ### only for previously combined data, same plot as in combine QC pdf

    if (!is.null(levels(msidata\$annotation))){

        number_combined = length(levels(msidata\$annotation))

        position_df = data.frame(coord(msidata)\$x, coord(msidata)\$y, msidata\$annotation)
        colnames(position_df) = c("x", "y","annotation")

        combine_plot = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
               geom_tile() +
               coord_fixed()+
               ggtitle("Spatial orientation of pixel annotations")+
               theme_bw()+
               theme(text=element_text(family="ArialMT", face="bold", size=12))+
               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
               theme(legend.position="bottom",legend.direction="vertical")+
               guides(fill=guide_legend(ncol=4,byrow=TRUE))

        print(combine_plot)

        ## for each annotation group find last pixel, there dashed lines will be drawn in plots over spectra index
        pixel_name_df = data.frame(pixels(msidata), msidata\$annotation)
        colnames(pixel_name_df) = c("pixel_number", "pixel_name")
        last_pixel = aggregate(pixel_number~pixel_name, data = pixel_name_df, max)
        pixel_vector = last_pixel[,2]
        abline_vector = pixel_vector

        ## remove position_df to clean up RAM space
            rm(position_df)
            gc()
    }

    ################### 1) Pixel order image ###################################

    pixelnumber = 1:pixelcount
    pixelxyarray=data.frame(coord(msidata)\$x, coord(msidata)\$y,pixelnumber)
    colnames(pixelxyarray) = c("x", "y", "pixelnumber")
    gg_title = "Pixel order"
    
    print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))+
     geom_tile() + coord_fixed()+
     ggtitle(gg_title) + theme_bw()+
     theme(plot.title = element_text(hjust = 0.5))+
     theme(text=element_text(family="ArialMT", face="bold", size=12))+
     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 
     space = "Lab", na.value = "black", name = "Pixel\nnumber"))

    ## remove pixelxyarray to clean up RAM space
        rm(pixelxyarray)
        gc()

    ################ 2) Number of calibrants per spectrum ######################

    ## matrix with calibrants in columns and in rows if there is peak intensity in range or not
    pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0)

    ## plot only possible when there is at least one valid calibrant
    if (length(inputcalibrantmasses) != 0){

        ## calculate plusminus values in m/z for each calibrant
        plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses

        ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0

        for (mass in 1:length(inputcalibrantmasses)){

            filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]

           if (nrow(filtered_data) > 0 & sum(spectra(filtered_data),na.rm=TRUE) > 0){

                ## intensity of all m/z > 0
                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0

            ###}else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){

                ## intensity of only m/z > 0
                intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0

            }else{

                intensity_sum = rep(FALSE, ncol(filtered_data))}

            ## for each pixel add sum of intensities > 0 in the given m/z range
            pixelmatrix = rbind(pixelmatrix, intensity_sum)
        }

        ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
        countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE))
        countdf= data.frame(coord(msidata)\$x, coord(msidata)\$y, countvector) ## add pixel coordinates to counts
        colnames(countdf) = c("x", "y", "countvector")
        mycolours = brewer.pal(9, "Set1")

        print(ggplot(countdf, aes(x=x, y=y, fill=countvector))+
          geom_tile() + coord_fixed() +
          ggtitle(paste0("Number of calibrants per pixel (±",$plusminus_ppm, " ppm)")) +
          theme_bw() +
          theme(plot.title = element_text(hjust = 0.5))+
          theme(text=element_text(family="ArialMT", face="bold", size=12))+
          scale_fill_manual(values = mycolours[1:length(countvector)], 
                                na.value = "black", name = "# calibrants"))

        ## remove countdf to clean up RAM space
            rm(countdf)
            gc()

    }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")}

    ########################## 3) fold change image ###########################

    #if $calibrantratio:
        #for $foldchanges in $calibrantratio:
            mass1 = $foldchanges.mass1
            mass2 = $foldchanges.mass2
            distance1 = $foldchanges.distance/1000000 * mass1
            distance2 = $foldchanges.distance/1000000 * mass2

            ### if user did not write a label use input m/z as label
            #if not str($foldchanges.filenameratioplot).strip():
                #set $label = "log2 fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2)
            #else:
                #set $label = $foldchanges.filenameratioplot
            #end if

            ### filter msidata for given m/z range (for both input m/z)
            filtered_data1 = msidata[mz(msidata) >= mass1-distance1 & mz(msidata) <= mass1+distance1,]
            filtered_data2 = msidata[mz(msidata) >= mass2-distance2 & mz(msidata) <= mass2+distance2,]
            ### m/z range for each plot (fixed range of 5 Da)
            ### xlim does not work because it does not adjust for the max. intensities within the range
            mzdown1 = features(msidata, mz = mass1-2)
            mzup1 = features(msidata, mz = mass1+3)
            mzdown2 = features(msidata, mz = mass2-2)
            mzup2 = features(msidata, mz = mass2+3)

            ### plot for first m/z 
            par(oma=c(0,0,2,0))
            print(plot(msidata[mzdown1:mzup1,], run="infile", layout=c(2,1), strip=FALSE, main=paste0("Average spectrum ", mass1, " Da")))
            abline(v=c(mass1-distance1, mass1, mass1+distance1), col="blue",lty=c(3,6,3))

            ### plot for second m/z 
            print(plot(msidata[mzdown2:mzup2,], run="infile", layout=FALSE, strip=FALSE, main= paste0("Average spectrum ", mass2, " Da")))
            abline(v=c(mass2-distance2, mass2, mass2+distance2), col="blue", lty=c(3,6,3))
            title("Control of fold change plot", outer=TRUE)

            ### filter spectra for max m/z to have two vectors, which can be divided
            ### plot spatial distribution of fold change

            ## calculate mean intensity for each m/z over the ppm range; then calculate log2 foldchange
            mass1vector = pixelApply(filtered_data1, mean, na.rm =TRUE)
            mass2vector = pixelApply(filtered_data2, mean, na.rm = TRUE)
            foldchange= log2(mass1vector/mass2vector)
            fcmatrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,foldchange)
            colnames(fcmatrix) = c("x", "y", "foldchange")

            print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange))+
             geom_tile() + coord_fixed()+
             ggtitle("$label")+
             theme_bw()+
             theme(plot.title = element_text(hjust = 0.5))+
             theme(text=element_text(family="ArialMT", face="bold", size=12))+
             scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
                                    ,space = "Lab", na.value = "black", name ="FC"))

            ## remove FC files to clean up RAM space
                rm(fcmatrix)
                rm(filtered_data1)
                rm(filtered_data2)
                gc()

        #end for
    #end if

    #################### 4) m/z heatmaps #######################################
    par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
    if (length(inputcalibrants[,1]) != 0){
        for (mass in 1:length(inputcalibrants[,1])){
            par(oma=c(0,0,0,1))## margin for image legend

           tryCatch(
                        {
                        print(image(msidata, mz=inputcalibrants[,1][mass], plusminus=plusminusvalues[mass], 
            main= paste0(inputcalibrants[,2][mass], ": ", round(inputcalibrants[,1][mass], digits = 2)," (±",$plusminus_ppm, " ppm)"),
            contrast.enhance = "histogram", strip=FALSE, ylim= c(maximumy,minimumy)))
                        },
                        error=function(cond) {
                        ## if there are not enough intensities in the mz range skip creating an image
                        print(paste0("Not enough intensities > 0 for m/z ", inputcalibrants[,1][mass]))
                        }
                    )    
        }
    } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}

    #################### 5) Number of peaks per pixel - image ##################

    ## here every intensity value > 0 counts as peak
    peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
    peakscoordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, peaksperpixel)
    colnames(peakscoordarray) = c("x", "y", "peaksperpixel")

    print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
     geom_tile() + coord_fixed() +
     ggtitle("Number of peaks per spectrum")+
     theme_bw() +
     theme(plot.title = element_text(hjust = 0.5))+
     theme(text=element_text(family="ArialMT", face="bold", size=12))+
     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "# peaks"))

    ## remove peakscoordarray to clean up RAM space
        rm(peakscoordarray)
        gc()


    ############################### 6) TIC image ###############################

    TICcoordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, TICs)
    colnames(TICcoordarray) = c("x", "y", "peaksperpixel")

    print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs))+
     geom_tile() + coord_fixed() +
     ggtitle("Total Ion Current")+
     theme_bw() +
     theme(plot.title = element_text(hjust = 0.5))+
     theme(text=element_text(family="ArialMT", face="bold", size=12))+
     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "TIC"))

    ## remove TICcoordarray to clean up RAM space
        rm(TICcoordarray)
        gc()

    ############################### 6b) median int image ###############################

    median_int = pixelApply(msidata, median)

    median_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, median_int)
    colnames(median_coordarray) = c("x", "y", "median_int")
    print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
     geom_tile() + coord_fixed() +
     ggtitle("Median intensity per spectrum")+
     theme_bw() +
     theme(plot.title = element_text(hjust = 0.5))+
     theme(text=element_text(family="ArialMT", face="bold", size=12))+
     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "median\nintensity"))

    ## remove median_coordarray to clean up RAM space
        rm(median_coordarray)
        gc()

    ############################### 6c) max int image ###############################

    max_int = pixelApply(msidata, max)

    max_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, max_int)
    colnames(max_coordarray) = c("x", "y", "max_int")
    print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
     geom_tile() + coord_fixed() +
     ggtitle("Maximum intensity per spectrum")+
     theme_bw() +
     theme(plot.title = element_text(hjust = 0.5))+
     theme(text=element_text(family="ArialMT", face="bold", size=12))+
     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                            ,space = "Lab", na.value = "black", name = "max\nintensity"))

    ## remove median_coordarray to clean up RAM space
        rm(max_coordarray)
        gc()

    ############################### 7) Most abundant m/z image #################

    ## for each spectrum find the row (m/z) with the highest intensity
    highestmz = pixelApply(msidata, which.max)

    ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
    if (class(highestmz) == "list"){
        ##find zero-length values
        zero_entry <- !(sapply(highestmz, length))
        ### replace these values with NA
        highestmz[zero_entry] <- NA
        ### unlist list to get a vector
        highestmz = unlist(highestmz)}

    highestmz_matrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,mz(msidata)[highestmz])
    colnames(highestmz_matrix) = c("x", "y", "highestmzinDa")

    print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))+
    geom_tile() + coord_fixed() +
    ggtitle("Most abundant m/z in each spectrum")+
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5))+
    scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", 
      limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))+
    theme(text=element_text(family="ArialMT", face="bold", size=12)))

    ## remove highestmz_matrix to clean up RAM space
        rm(highestmz_matrix)
        gc()


    ########################## 8) optional pca image for two components #################

    #if $do_pca:

        set.seed(1)
        pca = PCA(msidata, ncomp=2)

        ## plot overview image and plot and PC1 and 2 images
        print(plot(pca, col=c("black", "darkgrey"), main="PCA for two components", layout=c(2,1), strip=FALSE))
        print(image(pca, run="infile", col=c("black", "white"), strip=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1), layout=FALSE))

        par(oma=c(0,0,0,1))## margin for image legend
        print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1)))
        print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE,  ylim= c(maximumy+0.2*maximumy,minimumy-1)))
    ## remove pca to clean up RAM space
        rm(pca)
        gc()

    #end if

    ################## III) properties over spectra index ######################
    ############################################################################
    print("properties over pixels")
    par(mfrow = c(2,1), mar=c(5,6,4,2))

    ########################## 9) number of peaks per spectrum #################
    ## 9a) scatterplot

    plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
    title(xlab="Spectra index", line=3)
    title(ylab="Number of peaks", line=4)

    if (!is.null(levels(msidata\$annotation))){
        abline(v=abline_vector, lty = 3)}

    ## 9b) histogram

    hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 
    title(main="Number of peaks per spectrum", line=2)
    title(ylab="Frequency = # spectra", line=4)
    abline(v=median(peaksperpixel), col="blue")

    ## 9c) additional histogram to show contribution of annotation groups

    if (!is.null(levels(msidata\$annotation))){

        df_9 = data.frame(peaksperpixel, msidata\$annotation)
        colnames(df_9) = c("Npeaks", "annotation")

        hist_9 = ggplot(df_9, aes(x=Npeaks, fill=annotation)) +
        geom_histogram()+ theme_bw()+
        theme(text=element_text(family="ArialMT", face="bold", size=12))+
        theme(plot.title = element_text(hjust = 0.5))+
        theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
        theme(legend.position="bottom",legend.direction="vertical")+
        labs(title="Number of peaks per spectrum and annotation group", x="Number of peaks per spectrum", y = "Frequency = # spectra") +
        guides(fill=guide_legend(ncol=5,byrow=TRUE))+
        geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed")
        print(hist_9)}

    ########################## 10) TIC per spectrum ###########################

    ## 10a)density scatterplot
    par(mfrow = c(2,1), mar=c(5,6,4,2))
    
    ## colorDensityplot does not work after TIC normalization, therefore make normal plot
    if (min(TICs) == max(TICs)){
        plot(pixels(msidata), TICs, ylab = "", xlab = "", pch=20, main="TIC per spectrum", col="#FF3100")
    }else{
        plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="TIC per spectrum")
    }

    title(xlab="Spectra index", line=3)
    title(ylab = "Total ion current intensity", line=4)
    if (!is.null(levels(msidata\$annotation))){
        abline(v=abline_vector, lty = 3)}

    ## 10b) histogram
    hist((TICs), main="", las=1, xlab = "TIC per spectrum", ylab="")
    title(main= "TIC per spectrum", line=2)
    title(ylab="Frequency = # spectra", line=4)
    abline(v=median(TICs[TICs>0]), col="blue")

    ## 10c) additional histogram to show annotation contributions
    if (!is.null(levels(msidata\$annotation))){
        df_10 = data.frame((TICs), msidata\$annotation)
        colnames(df_10) = c("TICs", "annotation")

        hist_10 = ggplot(df_10, aes(x=TICs, fill=annotation)) +
        geom_histogram()+ theme_bw()+
        theme(text=element_text(family="ArialMT", face="bold", size=12))+
        theme(plot.title = element_text(hjust = 0.5))+
        theme(legend.position="bottom",legend.direction="vertical")+
        theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
        labs(title="TIC per spectrum and annotation group", x="TIC per spectrum", y = "Frequency = # spectra") +
        guides(fill=guide_legend(ncol=5,byrow=TRUE))+
        geom_vline(xintercept = median(TICs[TICs>0]), size = 1, colour = "black",linetype = "dashed")
        print(hist_10)}

    ################################## IV) properties over m/z ####################
    ############################################################################
    print("properties over m/z")

    ########################## 11) Histogram of m/z values #####################

    par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
    hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values")

    ########################## 12) Number of peaks per m/z #####################

    peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)

    par(mfrow = c(2,1), mar=c(5,6,4,4.5))
    ## 12a) scatterplot
    plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="")
    title(xlab="m/z", line=2.5)
    title(ylab = "Number of peaks", line=4)
    axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
    mtext("Coverage of spectra [%]", 4, line=3, adj=1)

    ## 12b) histogram
    hist(peakspermz, main="", las=1, ylab="", xlab="")
    title(ylab = "Frequency", line=4)
    title(main="Number of peaks per m/z", xlab = "Number of peaks per m/z", line=2)
    abline(v=median(peakspermz), col="blue") 

    ########################## 13) Sum of intensities per m/z ##################

    ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
    mzTIC = featureApply(msidata, sum, na.rm=TRUE) ## calculate intensity sum for each m/z

    par(mfrow = c(2,1), mar=c(5,6,4,2))
    ## 13a) scatterplot
    plot_colorByDensity(mz(msidata),mzTIC,  main= "Sum of intensities per m/z", ylab ="")
    title(xlab="m/z", line=2.5)
    title(ylab="Intensity sum", line=4)

    ## 13b) histogram
    hist(mzTIC, main="", xlab = "", las=1, ylab="")
    title(main="Sum of intensities per m/z", line=2, ylab="")
    title(xlab = "sum of intensities per m/z")
    title(ylab = "Frequency", line=4)
    abline(v=median(mzTIC[mzTIC>0]), col="blue")

    ################################## V) intensity plots ########################
    ############################################################################
    print("intensity plots")
    ########################## 14) Intensity distribution ######################

    par(mfrow = c(2,1), mar=c(5,6,4,2))

    ## 14a) Median intensity over spectra
    medianint_spectra = pixelApply(msidata, median)
    plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="")
    title(ylab="Median spectrum intensity", line=4)
    if (!is.null(levels(msidata\$annotation))){
        abline(v=abline_vector, lty = 3)}

    ## 14b) histogram: 
    hist(as.matrix(spectra(msidata)), main="", xlab = "", ylab="", las=1)
    title(main="Intensity histogram", line=2)
    title(xlab="intensities")
    title(ylab="Frequency", line=4)
    abline(v=median(as.matrix(spectra(msidata))[(as.matrix(spectra(msidata))>0)], na.rm=TRUE), col="blue")


    ## 14c) histogram to show contribution of annotation groups

    if (!is.null(levels(msidata\$annotation))){

        df_13 = data.frame(matrix(,ncol=2, nrow=0))
        for (subsample in levels(msidata\$annotation)){
            log2_int_subsample = spectra(msidata)[,msidata\$annotation==subsample]
            df_subsample = data.frame(as.numeric(log2_int_subsample))
            df_subsample\$annotation = subsample
            df_13 = rbind(df_13, df_subsample)}
        df_13\$annotation = as.factor(df_13\$annotation)
        colnames(df_13) = c("int", "annotation")

        hist_13 = ggplot(df_13, aes(x=int, fill=annotation)) +
        geom_histogram()+ theme_bw()+
        theme(text=element_text(family="ArialMT", face="bold", size=12))+
        labs(title="Intensities per sample", x="intensities", y = "Frequency") +
        theme(plot.title = element_text(hjust = 0.5))+
        theme(legend.position="bottom",legend.direction="vertical")+
        theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
        guides(fill=guide_legend(ncol=5,byrow=TRUE))+
        geom_vline(xintercept = median(spectra(msidata)[(spectra(msidata)>0)]), size = 1, colour = "black",linetype = "dashed")
        print(hist_13)

        ## 14d) boxplots to visualize in a different way the intensity distributions
        par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1))

        mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
        for (subsample in levels(msidata\$annotation)){
            mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
            mean_matrix = cbind(mean_matrix, mean_mz_sample)}

        boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
        (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))

        ## 14e) Heatmap of mean intensities of annotation groups

        colnames(mean_matrix) = levels(msidata\$annotation)
        mean_matrix[is.na(mean_matrix)] = 0
            heatmap.parameters <- list(mean_matrix, 
            show_rownames = T, show_colnames = T,
            main = "Heatmap of mean intensities per annotation group")
            par(oma=c(3,0,0,0))
            print(heatmap(mean_matrix),margins = c(10, 10))


        ## 14f) PCA of mean intensities of annotation groups

        ## define annotation by colour
        annotation_colour = rainbow(length(levels(msidata\$annotation)))[as.factor(levels(msidata\$annotation))]
        ## transform and scale dataframe
        pca = prcomp(t(mean_matrix),center=FALSE,scale.=FALSE)
        ## plot single plot
        plot(pca\$x[,c(1,2)],col=annotation_colour,pch=19)
        ## plot pca with colours for max first 5 PCs
        pc_comp = ifelse(ncol(pca\$x)<5 , ncol(pca\$x), 5)
        pairs(pca\$x[,1:pc_comp],col=annotation_colour,pch=19)
        legend("bottom", horiz = TRUE, legend=levels(msidata\$annotation), col=rainbow(length(levels(msidata\$annotation))), pch=19)

    }

    ################################## VI) Mass spectra and m/z accuracy ########################
    ############################################################################
    print("Mass spectra and m/z accuracy")

    ############################ 15) Mass spectra ##############################

    ## replace any NA with 0, otherwise plot function will not work at all
    msidata_no_NA = msidata

    ## find three equal m/z ranges for the average mass spectra plots: 
    third_mz_range = round(nrow(msidata_no_NA)/3,0)

    par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
    print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum"))
    print(plot(msidata_no_NA[1:third_mz_range,], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
    print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
    print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))

    ## plot one average mass spectrum for each pixel annotation group

    if (!is.null(levels(msidata\$annotation))){
        ## print legend only for less than 10 samples
        if (length(levels(msidata\$annotation)) < 10){
            key_legend = TRUE
        }else{key_legend = FALSE}
        par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
        print(plot(msidata, run="infile", pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups"))
    }

    ## plot 4 random mass spectra
    ## find four random, not empty pixel to plot their spectra in the following plots:
    pixel_vector = sample(which(TICs != 0),4)

    par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
    print(plot(msidata_no_NA, pixel = pixel_vector))


    ################### 16) Zoomed in mass spectra for calibrants ##############

    count = 1
    differencevector = numeric()
    differencevector2 = vector()

    if (length(inputcalibrantmasses) != 0){

    ### calculate plusminus values in m/z for each calibrant, this is used for all following plots
    plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses)) * inputcalibrantmasses

        for (mass in 1:length(inputcalibrantmasses)){

            ### define the plot window with xmin und xmax
            minmasspixel1 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.5)
            maxmasspixel1 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+1.5)
            minmasspixel2 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.25)
            maxmasspixel2 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+0.5)
            minmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-1.5)
            maxmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+3)

            ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
            filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]

            if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
                maxmassrow = featureApply(filtered_data, mean) ## for each m/z average intensity is calculated
                maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range
                mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value
            ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
            }else{
                ppmdifference = NA
                maxvalue = NA}
            differencevector[mass] = round(ppmdifference, digits=2)

            ### find m/z closest to inputcalibrant and calculate ppm difference for plot 18
            mznumber = features(msidata_no_NA, mz = inputcalibrantmasses[mass]) ### gives featurenumber which is closest to given m/z
            mzvalue = mz(msidata_no_NA)[mznumber] ### gives closest m/z
            mzdifference2 = mzvalue - inputcalibrantmasses[mass] 
            ppmdifference2 = mzdifference2/inputcalibrantmasses[mass]*1000000 
            differencevector2[mass] = round(ppmdifference2, digits=2)

            ## plotting of 4 spectra in one page
            par(oma=c(0,0,2,0))
            ## average plot

            print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum"))
            abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
            abline(v=c(maxvalue), col="red", lty=2)
            abline(v=c(mzvalue), col="green2", lty=4)
            ## average plot including points per data point
            print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=FALSE, strip=FALSE, main="Average spectrum with data points"))
            points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(spectra(msidata_no_NA)[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20)
            ## plot of third average plot
            print(plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum"))
            abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
            abline(v=c(maxvalue), col="red", lty=2)
            abline(v=c(mzvalue), col="green2", lty=4)
            ## plot of fourth average plot
            print(plot(msidata_no_NA[minmasspixel3:maxmasspixel3,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum"))
            abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
            abline(v=c(maxvalue), col="red", lty=2)
            abline(v=c(mzvalue), col="green2", lty=4)
            title(paste0("theor. m/z: ", round(inputcalibrants[count,1], digits=4)), col.main="blue", outer=TRUE, line=0, adj=0.074)
            title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49)
            title(paste0("closest m/z: ", round(mzvalue, digits=4)), col.main="green2", outer=TRUE, line=0, adj=0.93)

            ### 16b) one large extra plot with different colours for different pixel annotation groups

            if (!is.null(levels(msidata\$annotation))){
                if (number_combined < 10){
                    key_zoomed = TRUE
                }else{key_zoomed = FALSE}
                par(mfrow = c(1, 1))
                print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", strip=FALSE,main="Average spectrum per annotation group",
                pixel.groups=msidata\$annotation, key=key_zoomed, col=hue_pal()(number_combined),superpose=TRUE))
                abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="black", lty=c(3,1,3))
            }
            count=count+1
        }

    ## remove msidata_no_NA to clean up RAM space
        rm(msidata_no_NA)
        gc()

    ######### 17) ppm difference input calibrant m/z and m/z with max intensity in given m/z range#########

        par(mfrow = c(1,1))
        ### plot the ppm difference calculated above: theor. m/z value to highest m/z value: 

        calibrant_names = as.character(inputcalibrants[,2])
        diff_df = data.frame(differencevector, calibrant_names)

        if (sum(is.na(diff_df[,1])) == nrow(diff_df)){
                plot(0,type='n',axes=FALSE,ann=FALSE)
                title(main=paste("plot 17: no peaks in the chosen region, repeat with higher ppm range"))
        }else{

        diff_plot1=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() +
        labs(title="Average m/z error (max. average intensity vs. theor. calibrant m/z)", x="calibrants", y = "Average m/z error in ppm")+
        theme(plot.title = element_text(hjust = 0.5, size=14))+theme(text=element_text(family="ArialMT", face="bold", size=14))+
        geom_text(aes(label=differencevector), vjust=-0.3, size=5.5, col="blue") +
        theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14))

        print(diff_plot1)
        }

    ######### 18) ppm difference input calibrant m/z and closest m/z ###########

        ### plot the ppm difference calculated above theor. m/z value to closest m/z value: 

        differencevector2 = round(differencevector2, digits=2)
        calibrant_names = as.character(inputcalibrants[,2])
        diff_df = data.frame(differencevector2, calibrant_names)

        diff_plot2=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector2)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() +
        labs(title="Average m/z error (closest measured m/z vs. theor. calibrant m/z)", x="calibrants", y = "Average m/z error in ppm")+
        theme(plot.title = element_text(hjust = 0.5, size=14))+theme(text=element_text(family="ArialMT", face="bold", size=14))+
        geom_text(aes(label=differencevector2), vjust=-0.3, size=5.5, col="blue")+
        theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14))

        print(diff_plot2)

        #################### 19) ppm difference over pixels #####################

        par(mfrow = c(1,1))
        count = 1
        ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata)))
        for (calibrant in inputcalibrantmasses){
            ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA
            filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],]

            if (nrow(filtered_data) > 0){
                ### filtered for m/z range, find max peak in each spectrum
                ppm_vector = numeric()
                for (pixel_count in 1:ncol(filtered_data)){
                    ## for each spectrum (pixel_count) find the m/z that has the highest intensity
                    mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])]
                    mzdiff = mz_max - calibrant
                    ppmdiff = mzdiff/calibrant*1000000 

                    ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot)
                    if (max(spectra(filtered_data)[,pixel_count]) == 0 || is.na(max(spectra(filtered_data)[,pixel_count]))){
                        ppmdiff = NA}
                    ppm_vector[pixel_count] = ppmdiff}

            }else{
                ppm_vector = rep(NA, ncol(msidata))
            }

            ppm_df = cbind(ppm_df, ppm_vector)
            count=count+1
        }

        if (sum(is.na(ppm_df)) == ncol(ppm_df)*nrow(ppm_df)){
            plot(0,type='n',axes=FALSE,ann=FALSE)
            title(main=paste("plot 19: no peaks in the chosen region, repeat with higher ppm range"))
        }else{

            ### plot ppm differences over pixels (spectra index)
            par(mar=c(4.1, 4.1, 4.1, 8.5))

            plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)") 
            for (each_cal in 1:ncol(ppm_df)){
                lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")}
            legend("topright", inset=c(-0.2,0), xpd = TRUE, bty="n", cex=0.8,legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
             if (!is.null(levels(msidata\$annotation))){
                abline(v=abline_vector, lty = 3)}}

            ### make x-y-images for mz accuracy

            ppm_dataframe = data.frame(coord(msidata)\$x, coord(msidata)\$y, ppm_df)
            colnames(ppm_dataframe) = c("x", "y", "ppm_df")

            for (each_cal in 1:ncol(ppm_df)){
                tmp_ppm = ppm_dataframe[,c(1,2,each_cal+2)]
                tmp_ppm[,3] = as.numeric(tmp_ppm[,3])
                colnames(tmp_ppm) = c("x","y", "ppm_each_cal")

                print(ggplot(tmp_ppm, aes(x=x, y=y, fill=ppm_each_cal))+
                 geom_tile() + coord_fixed() +
                 ggtitle(paste0("m/z accuracy for ",inputcalibrants[,2][each_cal]))+
                 theme_bw() +
                 theme(plot.title = element_text(hjust = 0.5))+
                 theme(text=element_text(family="ArialMT", face="bold", size=12))+
                 scale_fill_gradient2(low = "navy", mid = "grey", high = "red", midpoint = 0 ,space = "Lab", na.value = "black", name = "ppm\nerror"))}


    }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")}
}else{
    print("inputfile has no intensities > 0")
}
    dev.off()


    ]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="reading_msidata"/>
        <conditional name="tabular_annotation">
            <param name="load_annotation" type="select" label="Use pixel annotation from tabular file for QC plots">
                <option value="no_annotation" selected="True">pixels belong into one group only</option>
                <option value="yes_annotation">use pixel annotation from a tabular file</option>
            </param>
                <when value="yes_annotation">
                    <expand macro="reading_pixel_annotations"/>
                </when>
                <when value="no_annotation"/>
        </conditional>
        <expand macro="pdf_filename"/>
        <expand macro="reading_2_column_mz_tabular" optional="true"/>
        <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/>
        <param name="do_pca" type="boolean" label="PCA with 2 components"/>
        <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10">
            <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/>
            <param name="mass2" value="2222" type="float" label="M/z 2" help="Second m/z"/>
            <param name="distance" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z and intensities will be averaged in this range."/>
            <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot.">
                <sanitizer invalid_char="">
                    <valid initial="string.ascii_letters,string.digits">
                        <add value="_" />
                        <add value=" " />
                    </valid>
                </sanitizer>
            </param>
        </repeat>
    </inputs>
    <outputs>
        <data format="pdf" name="QC_report" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on ${on_string}: results"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Processed.imzML"/>
                <composite_data value="Example_Processed.ibd"/>
            </param>
            <conditional name="processed_cond">
                <param name="processed_file" value="processed"/>
                <param name="accuracy" value="400"/>
                <param name="units" value="ppm"/>
            </conditional>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <param name="calibrant_file" value="inputcalibrantfile1.tabular" ftype="tabular"/>
            <param name="mz_column" value="1"/>
            <param name="name_column" value="1"/>
            <param name="plusminus_ppm" value="100"/>
            <param name="filename" value="Testfile_imzml"/>
            <param name="do_pca" value="True"/>
            <repeat name="calibrantratio">
                <param name="mass1" value="328.9"/>
                <param name="mass2" value="398.8"/>
                <param name="distance" value="500"/>
                <param name="filenameratioplot" value = "Ratio of mz 328.9 and mz 398.8"/>
            </repeat>
            <output name="QC_report" file="QC_imzml.pdf" compare="sim_size"/>
        </test>

        <test>
            <expand macro="infile_analyze75"/>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <param name="filename" value="Testfile_analyze75"/>
            <param name="do_pca" value="True"/>
            <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/>
        </test>

        <test>
            <param name="infile" value="3_files_combined.RData" ftype="rdata"/>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="yes_annotation"/>
                <param name="annotation_file" value="annotations_rdata.tabular"/>
                <param name="column_x" value="1"/>
                <param name="column_y" value="2"/>
                <param name="column_names" value="3"/>
                <param name="tabular_header" value="True"/>
            </conditional>
            <param name="calibrant_file" value="inputcalibrantfile1.tabular" ftype="tabular"/>
            <param name="mz_column" value="1"/>
            <param name="name_column" value="1"/>
            <param name="plusminus_ppm" value="100"/>
            <param name="filename" value="Testfile_rdata"/>
            <param name="do_pca" value="True"/>
            <output name="QC_report" file="QC_rdata.pdf" compare="sim_size"/>
        </test>
        <test>
            <param name="infile" value="empty_spectra.rdata" ftype="rdata"/>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="no_annotation"/>
            </conditional>
            <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
            <param name="mz_column" value="1"/>
            <param name="name_column" value="2"/>
            <param name="filename" value="Testfile_rdata"/>
            <param name="do_pca" value="False"/>
            <output name="QC_report" file="QC_empty_spectra.pdf" compare="sim_size"/>
        </test>
    </tests>
    <help>
        <![CDATA[
@CARDINAL_DESCRIPTION@

-----

This tool uses Cardinal to read files and create a quality control report with descriptive plots for mass spectrometry imaging data. 

@MSIDATA_INPUT_DESCRIPTION@
- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before the tools analysis starts. 
@SPECTRA_TABULAR_INPUT_DESCRIPTION@
            - at least two different annotations should be in the annotation column

@MZ_2COLS_TABULAR_INPUT_DESCRIPTION@
            - maximum of 9 m/z values per run are supported
            - names should be unique

**Options**

- m/z of interest (e.g. internal calibrants) and the ppm range are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy plots
- Optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio))
- All plots are described in more detail below

**Tip** 

- For additional m/z heatmaps use the MSI mz images tool and to plot more mass spectra use the MSI mass spectra tool. 
- To obtain the underlaying spectra and feature values used in this quality report, the imzML exporter tool can be used


**Output**

- quality control report as pdf with key numbers and descriptive plots describing the mass spectrometry imaging data


----------------------------------------------------------------------------------------------------------------------------------------------------

**Overview of the QC report plots**

- (annot): this plots will only be drawn if pixel annotations are loaded via a tabular file
- (cal): this plots will only be drawn if a tabular file with at least one valid calibrant m/z is provided
- (FC): this plots will only be drawn if the optional fold change image is selected
- Vertical lines in histograms represent median values. In density scatter plots the colour changes from blue to green, yellow and red the more points are overlayed.

- Overview of file properties: Numbers and ranges for m/z features and pixels are given. Median and range across all intensity values are provided. Intensities > 0 gives the percentage of m/z-pixel pairs with an intensity above zero. The number of empty spectra (TIC = 0), the median number of peaks (intensities > 0) per spectra as well as the median TIC (total ion current) are given. The processing status of the file is provided as well as the number of valid calibrants from the provided tabular file.> 0 (Intensities > 0). 

**x-y images (pixel/spectra information)**

- (annot) Spatial orientation of annotated pixel: All pixels of one annotation group have the same colour. 
- Pixel order: Shows the order of the pixels in the provided file. Depending on the instrument this can represent the acquisition order. If annotation file is provided pixels are ordered according to annotation groups. 
- (cal) Number of calibrants per pixel: In every spectrum the calibrant m/z window (calibrant m/z plusminus 'ppm range') is searched for peaks (intensity > 0). Calibrants are considered present in a spectrum when they have at least one peak in their m/z window. 
- (FC) Control of fold change plot: For both input m/z a zoomed in average spectrum is drawn with the input m/z as blue dashed line, the m/z range as blue dotted lines and the maximum intensity in the m/z window with a red line. 
- (FC) Fold change image: For each input m/z the average intensity within the given ppm range is calculated, then the log2 fold change of both average intensities is taken and plotted as heatmap. 
- (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z (can be outside the ppm range). The intensities are averaged within the calibrant m/z window (ppm range). 
- Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap.
- Total ion current: For each spectrum all intensities are summed up to obtain the TIC which is plotted as heatmap. 
- Median intensity: For each spectrum the median intensity is plotted as heatmap. 
- Maximum intensity: For each spectrum the maximum intensity is plotted as heatmap. 
- Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted. 
- PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels, principal components 1 and 2 are also plotted as x-y image.

**Properties over spectra/pixels**

- Number of peaks per spectrum: Scatter plot and histogram showing the number of intensities > 0 for each spectrum. If annotation tabular file is provided, the pixels are sorted according to annotation groups and the dotted lines in the scatter plot separate spectra of different annotation groups.
- (annot) Number of peaks per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group.
- TIC per spectrum: Scatter plot and histogram showing the sum of all intensities per spectrum (TIC). Dotted lines in the scatter plot separate spectra of different annotation groups.
- (annot) TIC per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group. Only the length of the coloured bar is important and not its height from zero, as bars are added up and not overlayed. 

**Properties over m/z features**

- Histogram of m/z values: Histogram of all m/z values (complete m/z axis)
- Number of peaks per m/z: Scatter plot and histogram giving the number of intensities > 0 for each m/z. 
- Sum of intensities per m/z: Scatter plot and histogram of the sum of all intensities per m/z. 

**Intensity plots**

- Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
- Histogram of intensities. 
- (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 
- (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. 
- (annot) Heatmap of mean intensity per m/z
- (annot) PCA of mean intensity per m/z

**Mass spectra and m/z accuracy**

- Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
- (annot) Average mass spectrum per annotation group.
- Random mass spectra: The mass spectra for four random pixel are plotted.
- (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
- (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. 
- (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra. 
- (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra. 
- (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups. 
- (cal) Same m/z accuracy in ppm is plotted per calibrant and per spectrum as image in x-y dimension.


        ]]>
    </help>
    <expand macro="citations"/>
</tool>