diff combine.xml @ 5:b41107d8fe89 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 1df4591de435d232862f20669aea529ceb23b12a"
author galaxyp
date Fri, 13 Dec 2019 13:54:23 -0500
parents 48c07268f341
children bb1ac6b47a6c
line wrap: on
line diff
--- a/combine.xml	Fri Mar 22 08:18:29 2019 -0400
+++ b/combine.xml	Fri Dec 13 13:54:23 2019 -0500
@@ -1,13 +1,17 @@
-<tool id="cardinal_combine" name="MSI combine" version="@VERSION@.3">
+<tool id="cardinal_combine" name="MSI combine" version="2.2.6.0">
     <description>
         combine several mass spectrometry imaging datasets into one
     </description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements">
-        <requirement type="package" version="3.0">r-ggplot2</requirement>
-    </expand>
+    <requirements>
+        <requirement type="package" version="2.2.6">bioconductor-cardinal</requirement>
+        <requirement type="package" version="3.6.1">r-base</requirement>
+        <requirement type="package" version="3.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="0.12">r-maldiquantforeign</requirement>
+        <requirement type="package" version="1.19.3">r-maldiquant</requirement>
+    </requirements>
     <command detect_errors="exit_code">
     <![CDATA[
         #for $i, $infile in enumerate($infiles):
@@ -30,11 +34,9 @@
 
         cat '${msi_combine}' &&
         Rscript '${msi_combine}' &&
-        #if str($imzml_output) == "imzml_format":
-            mkdir $outfile_imzml.files_path &&
-            mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
-            mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
-        #end if
+        mkdir $outfile_imzml.files_path &&
+        mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
+        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
         echo "imzML file:" > $outfile_imzml &&
         ls -l "$outfile_imzml.files_path" >> $outfile_imzml
 
@@ -42,13 +44,14 @@
     </command>
     <configfiles>
         <configfile name="msi_combine"><![CDATA[
-
 #import re
 
-################ load libraries and some preparations #################
+################ 1) load libraries and do preparations #################
 
 library(Cardinal)
 library(ggplot2)
+library(MALDIquantForeign)
+library(MALDIquant)
 
 ## read tabular file for xy_shift option
 #if str( $combine_conditional.combine_method ) == 'xy_shifts':
@@ -63,18 +66,19 @@
 }
 
 ## preparations for reading files one by one with for loop
-pixel_vector = numeric()
+sample_names = numeric()
 x_shifts = 0
 y_shifts = 0
 max_y = numeric()
 valid_dataset = logical()
+coordinates_combined = data.frame(matrix(,ncol=2, nrow=0))
+msidata_combined = list()
 #set $msidata = []
 #set $pixelcoords = []
 #set $num_infiles = len($infiles)
 all_files = $num_infiles
 
-
-############## reading files and changing pixel coordinates ###################
+###################### 2) reading MSI files ####################################
 
 #for $i, $infile in enumerate($infiles):
 
@@ -82,197 +86,226 @@
 
     #if $infile.ext == 'imzml'
         #if str($processed_cond.processed_file) == "processed":
-            msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units", attach.only=TRUE)
+            msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units", attach.only=TRUE, as = c("MSImageSet"))
             centroided(msidata_$i) = $centroids
         #else
-            msidata_$i <- readImzML('infile_${i}', attach.only=TRUE)
+            msidata_$i <- readImzML('infile_${i}', attach.only=TRUE, as = c("MSImageSet"))
             centroided(msidata_$i) = $centroids
         #end if
     #elif $infile.ext == 'analyze75'
-            msidata_$i <- readAnalyze('infile_${i}', attach.only=TRUE)
+            msidata_$i <- readAnalyze('infile_${i}', attach.only=TRUE, as = c("MSImageSet"))
             centroided(msidata_$i) = $centroids
     #else
             msidata_$i = loadRData('infile_${i}.RData')
+            ## keep compatibility with old .RData files:
+            msidata_$i\$column1 = NULL
+            msidata_$i\$column2 = NULL
+            msidata_$i\$column3 = NULL
+            msidata_$i\$column4 = NULL
+            msidata_$i\$column5 = NULL
+            msidata_$i\$combined_sample = NULL
+
     #end if
 
     ## remove duplicated coordinates, otherwise combine will fail
     print(paste0(sum(duplicated(coord(msidata_$i))), " duplicated coordinates were removed from input file"))
     msidata_${i} <- msidata_${i}[,!duplicated(coord(msidata_${i}))]
 
-    ## same name for MSI data files necessary to combine data in one single coordinate system
+    ## same name for MSI data files necessary to combine data into one single coordinate system
     sampleNames(msidata_$i) = "msidata"
 
-
-    ## read and process annotation tabular or automatically use name of infile as annotation
-
-        ## set all pixel annotations to NA, necessary in case files were combined before with different annotations
-        msidata_$i\$column1 = rep(NA, ncol(msidata_$i))
-        msidata_$i\$column2 = rep(NA, ncol(msidata_$i))
-        msidata_$i\$column3 = rep(NA, ncol(msidata_$i))
-        msidata_$i\$column4 = rep(NA, ncol(msidata_$i))
-        msidata_$i\$column5 = rep(NA, ncol(msidata_$i))
-        msidata_$i\$combined_sample = rep(NA, ncol(msidata_$i))
-
+############ 3) Read and process annotation tabular files ######################
 
     #if str($annotation_cond.annotation_tabular) == 'annotation'
         print("annotations")
 
         ## read annotation tabular, set first two columns as x and y, merge with coordinates dataframe and order according to pixelorder in msidata
         input_annotation = read.delim("annotation_file_${i}.tabular", header = $annotation_cond.tabular_header, stringsAsFactors = FALSE)
+        colnames(input_annotation)[1:2] = c("x", "y")
 
-        colnames(input_annotation)[1:2] = c("x", "y")
         msidata_coordinates = cbind(coord(msidata_$i)[,1:2], 1:ncol(msidata_$i))
         colnames(msidata_coordinates)[3] = "pixel_index"
 
-        ## only first 5 annotation columns are kept
-        if (ncol(input_annotation) > 7){
-            input_annotation = input_annotation[,1:7]}
-
         annotation_df = merge(msidata_coordinates, input_annotation, by=c("x", "y"), all.x=TRUE)
-        annotation_df_8 = cbind(annotation_df, data.frame(matrix(NA,ncol=8-ncol(annotation_df), nrow=ncol(msidata_$i)))) 
-        annotation_df_8_sorted = annotation_df_8[order(annotation_df_8\$pixel_index),]## orders pixel according to msidata
-
-        ## each annotation column is assigned to the pixel in the pData slot of the MSIdata
-        msidata_$i\$column1 = annotation_df_8_sorted[,4]
-        msidata_$i\$column2 = annotation_df_8_sorted[,5]
-        msidata_$i\$column3 = annotation_df_8_sorted[,6]
-        msidata_$i\$column4 = annotation_df_8_sorted[,7]
-        msidata_$i\$column5 = annotation_df_8_sorted[,8]
-
+        annotation_df_sorted = annotation_df[order(annotation_df\$pixel_index),]## orders pixel according to msidata 
+        annotation_df_sorted\$pixel_index = NULL
 
         ## extract columnnames from (last) annotation tabular (for QC plot names)
-        annotation_colnames = colnames(input_annotation)[-c(1,2)]
+        annotation_colnames = colnames(input_annotation)
 
     #end if
 
-
-    ################### preparation xy shifts ##########################
+############### 4) shift coordinates with xy shifts ############################
 
     #if str( $combine_conditional.combine_method ) == 'xy_shifts':
 
+        ## optional: set all files to 1/1 and then add shift
+        #if $combine_conditional.xy_origin:
+            coord(msidata_$i)\$x = as.integer(coord(msidata_$i)\$x - min(coord(msidata_$i)\$x-1))
+            coord(msidata_$i)\$y = as.integer(coord(msidata_$i)\$y - min(coord(msidata_$i)\$y-1))
+        #end if
+
         ## shift coordinates according to input tabular file and store file names
         coord(msidata_$i)\$x = as.integer(coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x]) ## shifts x coordinates according to tabular file
         coord(msidata_$i)\$y = as.integer(coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y]) ## shifts y coordinates according to tabular file
-        pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel
-        msidata_$i\$combined_sample = rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))
-        ## store number of file
+        sample_name = rep(paste(input_list[$i+1,$combine_conditional.column_names]),times=ncol(msidata_$i)) ## stores file name for each pixel
+        sample_names = append(sample_names, sample_name)
+
+        ## store number of file to use later when removing duplicate coordinates
         pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i)))
         #silent $pixelcoords.append('pixelcoords_'+str($i))
         colnames(pixelcoords_$i)[3] = "file_number"
 
-    ################### preparation automatic combination ##########################
+##################### 5) shift coordinates automatically #######################
 
     #elif str( $combine_conditional.combine_method ) == 'automatic_combine':
 
-        ## use name of Galaxy inputfile as combined sample annotation
-        names_vector = character()
+        ## use name of Galaxy inputfile as sample annotation
+        sample_name = character()
         #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier))
 
         if (ncol(msidata_$i)>0 & nrow(msidata_$i) >0)
         {
-            if (is.null(levels(msidata_$i\$combined_sample)))
-            {
-            names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i)))
-            msidata_$i\$combined_sample = as.factor(names_vector)
-            }
+            sample_name = append(sample_name, rep(paste("$escaped_element_identifier"),ncol(msidata_$i)))
         }
 
         ## Number of input files define grid which is row-wise filled with files
         coord(msidata_$i)\$x = as.integer(coord(msidata_$i)\$x - (min(coord(msidata_$i)\$x-1)) + x_shifts)
         coord(msidata_$i)\$y = as.integer(coord(msidata_$i)\$y - (min(coord(msidata_$i)\$y-1)) + y_shifts)
-        x_shifts = max(coord(msidata_$i)\$x) + 5
+
+        x_shifts = max(coord(msidata_$i)\$x) + $combine_conditional.shift_value
         max_y = append(max_y, max(coord(msidata_$i)\$y))
         all_files = $num_infiles
         new_row = ($i+1)/ceiling(sqrt(all_files))
         new_row%%1==0
         if (new_row%%1==0)
         {x_shifts = 0 ### when row is filled: x values start again at zero
-         y_shifts = max(max_y) + 5 ### when row is filled: y value increases to start a new row
+         y_shifts = max(max_y) + $combine_conditional.shift_value ### when row is filled: y value increases to start a new row
         max_y = numeric()}
 
     #end if
 
+############################# 6) combination of files ##########################
+
+    ## combine shifted coordinates with sample name and annotations from input file
+
+    #if str($annotation_cond.annotation_tabular) == 'annotation'
+        cardinal_coordinates_$i = data.frame(as.matrix(Cardinal::coord(msidata_$i)[,1:2]), sample_name, annotation_df_sorted[,-c(1,2)])
+    #else
+        cardinal_coordinates_$i = data.frame(as.matrix(Cardinal::coord(msidata_$i)[,1:2]), sample_name)
+    #end if
+
     ## store files to combine them later and for each file check if it is valid
+    #silent $msidata.append('msidata_'+str($i)) 
+    valid_dataset = append(valid_dataset, ncol(msidata_$i)>0 & nrow(msidata_$i)>0) 
+
+
+######################### 6a) combination different mz axis ####################
+
+    ## combination for files with different mz axis via MALDIquant during for loop
+
+    #if $processed_true: 
+    print("mz axis differ")
 
-    #silent $msidata.append('msidata_'+str($i))
-    valid_dataset = append(valid_dataset, ncol(msidata_$i)>0 & nrow(msidata_$i)>0) ## file with no intensities is considered valid
+        cardinal_mzs_$i = Cardinal::mz(msidata_$i)
+
+        for(number_spectra in 1:ncol(msidata_${i})){
+
+            if (centroided(msidata_$i) == FALSE){
+                ## create mass spectrum object
+                maldi_data_${i} = list()
+                for(number_spectra in 1:ncol(msidata_$i)){
+                    maldi_data_${i}[[number_spectra]] = MALDIquant::createMassSpectrum(mass = cardinal_mzs_$i, intensity = Cardinal::iData(msidata_$i)[,number_spectra])}
+            }else{
+                maldi_data_${i} = list()
+                for (spectra in 1:ncol(msidata_$i))
+                {
+                    single_peaks_${i} = createMassPeaks(cardinal_mzs_$i, Cardinal::spectra(msidata_$i)[,spectra], snr=as.numeric(rep("NA", nrow(msidata_$i))))
+                    maldi_data_${i}[[spectra]] = single_peaks_${i}
+                }
+            }
+        }
+
+        msidata_combined = append(msidata_combined, maldi_data_$i)
+
+    #end if
+
+    coordinates_combined = rbind(coordinates_combined, cardinal_coordinates_$i) 
 
 #end for
 
 
-###################### automatic combination ###################################
-################################################################################
+######################### 6a) combination same mz axis ###################
 
 #if str( $combine_conditional.combine_method ) == 'automatic_combine':
-    print("automatic_combine")
-
-    ## combine only valid datasets
+    print("automatic combine")
 
-    valid_data =  list(#echo ','.join($msidata)#)[valid_dataset]
-    msidata_combined = do.call(combine, valid_data)
-    print("Valid datasets in order of input bottom to top:")
-    print(valid_dataset)
+   #if not $processed_true:
+       ## combine only valid datasets
 
-    ## create dataframe with x,y,sample_name and show all pixels in PDF as QC
-    position_df = cbind(coord(msidata_combined)[,1:2], msidata_combined\$combined_sample)
-    colnames(position_df)[3] = "sample_name"
-
-    msidata = msidata_combined
+        valid_data =  list(#echo ','.join($msidata)#)[valid_dataset]
+        msidata = do.call(combine, valid_data)
+        print("Valid datasets in order of input bottom to top:")
+        print(valid_dataset)
+        writeImzML(msidata, "out")
 
-    ## save msidata as imzML file
-    #if str($imzml_output) == "imzml_format":
-        writeImzML(msidata, "out")
-    #elif str($imzml_output) == "rdata_format":
-        ## save as (.RData)
-        iData(msidata) = iData(msidata)[]
-        save(msidata, file="$outfile_rdata")
+    #else
+
+        ## save msidata as imzML file MALDIquant
+        MALDIquantForeign::exportImzMl(msidata_combined, file="out.imzML", processed=TRUE, coordinates=as.matrix(coordinates_combined[,1:2]))
+
     #end if
 
-
-################################## xy shifts ###################################
+########################### xy shift combination ###############################
 ################################################################################
 
 #elif str( $combine_conditional.combine_method ) == 'xy_shifts':
     print("xy_shifts")
 
-    ## in case user made mistake with xy shifts: find duplicated coordinates
-    all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#))
-    duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE)
-print(paste0("Number of removed duplicated coordinates after combination: ", sum(duplicated_coordinates)/2))
-    unique_coordinates = all_coordinates[!duplicated_coordinates,]
+    #if not $processed_true:
+
+        duplicated_coordinates= duplicated(coordinates_combined[,1:2])| duplicated(coordinates_combined[,1:2], fromLast=TRUE)
+        print(paste0("Number of removed duplicated coordinates after combination: ", sum(duplicated_coordinates)))
+        coordinates_combined = coordinates_combined[!duplicated_coordinates,]
 
-    ## remove duplicated coordinates
-    datasetlist = list()
-    count = 1
-    for (usable_dataset in list(#echo ','.join($msidata)#)){
-        pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)]
-        filtered_dataset = usable_dataset[,pixelsofinterest]
-        if (ncol(filtered_dataset) > 0 ){
-            datasetlist[[count]] = filtered_dataset}
-        count = count +1}
+        ## remove duplicated coordinates
+        datasetlist = list()
+        count = 1
+        for (usable_dataset in list(#echo ','.join($msidata)#)){
+            pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(coordinates_combined)]
+            filtered_dataset = usable_dataset[,pixelsofinterest]
+            if (ncol(filtered_dataset) > 0 ){
+                datasetlist[[count]] = filtered_dataset}
+            count = count +1}
+
+        msidata = do.call(combine, datasetlist)
+        writeImzML(msidata, "out")
 
-    msidata_combined = do.call(combine, datasetlist)
+    #else
 
-    msidata = msidata_combined
+        ## in case user made mistake with xy shifts: find duplicated coordinates
+        duplicated_coordinates= duplicated(coordinates_combined[,1:2])| duplicated(coordinates_combined[,1:2], fromLast=TRUE)
+        print(paste0("Number of removed duplicated coordinates after combination: ", sum(duplicated_coordinates)))
+        unique_coordinates = data.frame(coordinates_combined)[!duplicated_coordinates,]
+
+        filtered_dataset = msidata_combined [!duplicated_coordinates]
+        coordinates_matrix = as.matrix(unique_coordinates[,1:2])
 
-    ## save msidata as imzML file
-    #if str($imzml_output) == "imzml_format":
-        writeImzML(msidata, "out")
-    #elif str($imzml_output) == "rdata_format":
-        ## save as (.RData)
-        iData(msidata) = iData(msidata)[]
-        save(msidata, file="$outfile_rdata")
+        ## save msidata as imzML file MALDIquant
+        MALDIquantForeign::exportImzMl(filtered_dataset, file="out.imzML", processed=TRUE, coordinates=coordinates_matrix)
+
+        ## create x,y,sample_name dataframe for QC pdf
+       ## position_df = unique_coordinates ### ! anders
+       ## colnames(position_df)[3] = "sample_name"
+        ##coordinates_combined = coordinates_combined[pixelsofinterest,] ###! anders
+        coordinates_combined = unique_coordinates
+##TODO: hier aufräumen kann alles weg?!
+
     #end if
 
-    ## create x,y,sample_name dataframe for QC pdf
-
-    position_df = cbind(coord(msidata)[,1:2], msidata\$combined_sample)
-    colnames(position_df)[3] = "sample_name"
-
 #end if
 
-
-################################## outputs ####################################
+################################## outputs #####################################
 ################################################################################
 
 ########### QC with pixels and their annotations ################################
@@ -280,7 +313,7 @@
 pdf("Combined_qc.pdf", width=15, height=15)
 
 ## combined plot
-combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
+combine_plot = ggplot(coordinates_combined[,1:3], aes(x=x, y=y, fill=sample_name))+
        geom_tile() +
        coord_fixed()+
        ggtitle("Spatial orientation of combined data")+
@@ -288,125 +321,39 @@
        theme(text=element_text(family="ArialMT", face="bold", size=15))+
        theme(legend.position="bottom",legend.direction="vertical")+
        guides(fill=guide_legend(ncol=4,byrow=TRUE))
-coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
+coord_labels = aggregate(cbind(x,y)~sample_name, data=coordinates_combined[,1:3], mean)
 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
-for(file_count in 1:nrow(coord_labels))
-    {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
-    y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
+
 print(combine_plot)
 
     #if str($annotation_cond.annotation_tabular) == 'annotation'
         ## annotation plots
-
-        ## plot 1
-        column1_df = cbind(coord(msidata)[,1:2], msidata\$column1)
-        colnames(column1_df)[3] = "column1"
-
-        if (sum(is.na(column1_df[3])) < nrow(column1_df)){
-        column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+
-               geom_tile() +
-               coord_fixed()+
-               ggtitle(paste0(annotation_colnames[1]))+
-               theme_bw()+
-               theme(text=element_text(family="ArialMT", face="bold", size=15))+
-               theme(legend.position="bottom",legend.direction="vertical")+
-               guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[1]))
-        print(column1_plot)}
-        ##rename columnname for output tabular file
-        colnames(column1_df)[3] = annotation_colnames[1]
-
-        ## plot 2
-        column2_df = cbind(coord(msidata)[,1:2], msidata\$column2)
-        colnames(column2_df)[3] = "column2"
-
-        if (sum(is.na(column2_df[3])) < nrow(column2_df)){
-        column2_plot = ggplot(column2_df, aes(x=x, y=y, fill=column2))+
-               geom_tile() +
-               coord_fixed()+
-               ggtitle(paste0(annotation_colnames[2]))+
-               theme_bw()+
-               theme(text=element_text(family="ArialMT", face="bold", size=15))+
-               theme(legend.position="bottom",legend.direction="vertical")+
-               guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[2]))
-        print(column2_plot)}
-
-        ##rename columnname for output tabular file
-        colnames(column2_df)[3] = annotation_colnames[2]
+        for (inputcolumns in 4:ncol(coordinates_combined)){
+                ## plot 1
+                column1_df = coordinates_combined[,c(1,2,inputcolumns)]
+                colnames(column1_df)[3] = "column1"
 
-        ## plot 3
-        column3_df = cbind(coord(msidata)[,1:2], msidata\$column3)
-        colnames(column3_df)[3] = "column3"
-        if (sum(is.na(column3_df[3])) < nrow(column3_df)){
-        column3_plot = ggplot(column3_df, aes(x=x, y=y, fill=column3))+
-               geom_tile() +
-               coord_fixed()+
-               ggtitle(paste0(annotation_colnames[3]))+
-               theme_bw()+
-               theme(text=element_text(family="ArialMT", face="bold", size=15))+
-               theme(legend.position="bottom",legend.direction="vertical")+
-               guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[3]))
-        print(column3_plot)}
-        ##rename columnname for output tabular file
-        colnames(column3_df)[3] = annotation_colnames[3]
-
-        ## plot 4
-        column4_df = cbind(coord(msidata)[,1:2], msidata\$column4)
-        colnames(column4_df)[3] = "column4"
+                if (sum(is.na(column1_df[3])) < nrow(column1_df)){
+                column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+
+                       geom_tile() +
+                       coord_fixed()+
+                       ggtitle(paste0(annotation_colnames[inputcolumns-1]))+
+                       theme_bw()+
+                       theme(text=element_text(family="ArialMT", face="bold", size=15))+
+                       theme(legend.position="bottom",legend.direction="vertical")+
+                       guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[1]))
+                print(column1_plot)}
+                ##rename columnname for output tabular file
+                colnames(column1_df)[3] = annotation_colnames[inputcolumns-1]
 
-        if (sum(is.na(column4_df[3])) < nrow(column4_df)){
-        column4_plot = ggplot(column4_df, aes(x=x, y=y, fill=column4))+
-               geom_tile() +
-               coord_fixed()+
-               ggtitle(paste0(annotation_colnames[4]))+
-               theme_bw()+
-               theme(text=element_text(family="ArialMT", face="bold", size=15))+
-               theme(legend.position="bottom",legend.direction="vertical")+
-               guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[4]))
-        print(column4_plot)}
-        ##rename columnname for output tabular file
-        colnames(column4_df)[3] = annotation_colnames[4]
-
-        ## plot5
-
-        column5_df = cbind(coord(msidata)[,1:2], msidata\$column5)
-        colnames(column5_df)[3] = "column5"
-        if (sum(is.na(column5_df[3])) < nrow(column5_df)){
-        column5_plot = ggplot(column5_df, aes(x=x, y=y, fill=column5))+
-               geom_tile() +
-               coord_fixed()+
-               ggtitle(paste0(annotation_colnames[5]))+
-               theme_bw()+
-               theme(text=element_text(family="ArialMT", face="bold", size=15))+
-               theme(legend.position="bottom",legend.direction="vertical")+
-               guides(fill=guide_legend(ncol=4,byrow=TRUE, title=annotation_colnames[5]))
-        print(column5_plot)}
-        ##rename columnname for output tabular file
-        colnames(column5_df)[3] = annotation_colnames[5]
+        }
 
     #end if
     dev.off()
 
 ##################### annotation tabular output ################################
 
-    if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){
-        position_df\$sample_name = gsub("^[^_]*_","",position_df\$sample_name)
-
-        #if str($annotation_cond.annotation_tabular) == 'no_annotation':
-
-            write.table(position_df, file="$pixel_annotations", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
-
-        #else
-            annotation_df_list = list(position_df, column1_df, column2_df, column3_df, column4_df, column5_df)
-            combined_annotations = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), annotation_df_list)
-            write.table(combined_annotations, file="$pixel_annotations", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
-        #end if 
-
-    }else{
-        print("No annotation tabular output because file has no features or pixels left")
-    }
-
-
-
+            write.table(coordinates_combined, file="$pixel_annotations", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
 
     ]]></configfile>
     </configfiles>
@@ -438,7 +385,7 @@
             <when value="annotation">
                 <param name="annotation_files" type="data" multiple="true" format="tabular"
                     label="Pixel annotations tabular files"
-                    help="Same number and order of files as input files. First column x values, second column y values. Up to 5 columns with pixel annotations"/>
+                    help="Same number and order of files as input files. First column x values, second column y values, further columns with annotations"/>
                 <param name="tabular_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/>
             </when>
         </conditional>
@@ -447,7 +394,9 @@
                 <option value="automatic_combine" selected="True" >automatic combination</option>
                 <option value="xy_shifts">shift xy coordinates with a tabular file</option>
             </param>
-            <when value="automatic_combine"/>
+            <when value="automatic_combine">
+                <param name="shift_value" type="integer" value="5" label="Number of empty pixels that should separate different datasets in x and y dimension"/>
+            </when>
             <when value="xy_shifts">
                 <param name="coordinates_file" type="data" format="tabular" label="Datasetnames, x and y values to shift pixel coordinates before combining"
             help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift. Pixels with the same coordinates after shifting will be deleted."/>
@@ -455,21 +404,13 @@
                 <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/>
                 <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/>
                 <param name="xy_header" type="boolean" label="Tabular files contain a header line" truevalue="TRUE" falsevalue="FALSE"/>
+                <param name="xy_origin" type="boolean" label="Set all file coordinates to 1/1 as origin" truevalue="TRUE" falsevalue="FALSE" help="Yes: all file coordinates are shifted in order to have at least one pixel with x = 1 and one with y = 1; then coordinates shifts are added. No: the coordinate shifts are added to the current coordinates of the file"/>
             </when>
         </conditional>
-        <param name="imzml_output" type="select" display = "radio" optional = "False"
-               label="Output format" help= "Choose the output format">
-                <option value="imzml_format">imzML</option>
-                <option value="rdata_format" selected="True" >RData</option>
-        </param>
+        <param name="processed_true" type="boolean" label="Do the files have different m/z axis?" help="If all m/z values of all files are exactly the same choose 'No'. If not sure choose 'Yes'." truevalue="TRUE" falsevalue="FALSE"/>
     </inputs>
     <outputs>
-        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
-            <filter>imzml_output=='imzml_format'</filter>
-        </data>
-        <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData">
-            <filter>imzml_output == 'rdata_format'</filter>
-       </data>
+        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
         <data format="pdf" name="QC_overview" from_work_dir="Combined_qc.pdf" label = "${tool.name} on ${on_string}: QC"/>
         <data format="tabular" name="pixel_annotations" label="${tool.name} on ${on_string}: annotations"/>
     </outputs>
@@ -486,10 +427,29 @@
             <param name="column_x" value="1"/>
             <param name="column_y" value="2"/>
             <param name="column_names" value="3"/>
-            <param name="imzml_output" value="rdata_format"/>
+            <param name="processed_true" value="FALSE"/>
             <output name="pixel_annotations" file="123_annotation_output.tabular"/>
-            <output name="outfile_rdata" file="123_combined.RData" compare="sim_size" />
             <output name="QC_overview" file="123_combined_QC.pdf" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="123_combined.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="123_combined.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="123_combined.ibd" name="ibd" compare="sim_size"/>
+            </output>
+        </test>
+        <test>
+            <param name="infiles" value="123_combined_picked.rdata,123_combined_picked2.rdata" ftype="rdata"/>
+            <param name="centroids" value="TRUE"/>
+            <param name="combine_method" value="xy_shifts"/>
+            <param name="coordinates_file" ftype="tabular" value="xy_coordinates2.tabular"/>
+            <param name="column_x" value="1"/>
+            <param name="column_y" value="2"/>
+            <param name="column_names" value="3"/>
+            <param name="processed_true" value="TRUE"/>
+            <output name="pixel_annotations" file="picked.tabular"/>
+            <output name="QC_overview" file="picked_QC.pdf" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="picked.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="picked.imzml" name="imzml" lines_diff="6"/>
+                <extra_files type="file" file="picked.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/>
@@ -497,7 +457,7 @@
                 <param name="annotation_tabular" value="no_annotation"/>
             </conditional>
             <param name="combine_method" value="automatic_combine"/>
-            <param name="imzml_output" value="imzml_format"/>
+            <param name="processed_true" value="FALSE"/>
             <output name="QC_overview" file="123_combined_auto.pdf" compare="sim_size"/>
             <output name="pixel_annotations" file="123_combined_auto.tabular"/>
             <output name="outfile_imzml" ftype="imzml" file="123_combined_auto.imzml.txt" compare="sim_size">
@@ -513,10 +473,13 @@
                 <param name="tabular_header" value="TRUE"/>
             </conditional>
             <param name="combine_method" value="automatic_combine"/>
-            <param name="imzml_output" value="rdata_format"/>
+            <param name="processed_true" value="FALSE"/>
             <output name="pixel_annotations" file="12_annotation_output.tabular"/>
-            <output name="outfile_rdata" file="12_combined.RData" compare="sim_size" />
             <output name="QC_overview" file="12_combined_QC.pdf" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="12_combined.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="12_combined.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="12_combined.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
         <test>
             <param name="infiles" value="msidata_1.RData,123_combined.RData" ftype="rdata"/>
@@ -526,7 +489,7 @@
                 <param name="tabular_header" value="TRUE"/>
             </conditional>
             <param name="combine_method" value="automatic_combine"/>
-            <param name="imzml_output" value="imzml_format"/>
+            <param name="processed_true" value="FALSE"/>
             <output name="pixel_annotations" file="112_annotation_output.tabular"/>
             <output name="QC_overview" file="112_auto_combined_QC.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="112_auto_combined.imzml.txt" compare="sim_size">
@@ -540,10 +503,13 @@
                 <param name="annotation_tabular" value="no_annotation"/>
             </conditional>
             <param name="combine_method" value="automatic_combine"/>
-            <param name="imzml_output" value="rdata_format"/>
+            <param name="processed_true" value="FALSE"/>
             <output name="pixel_annotations" file="2123_annotation_output.tabular"/>
-            <output name="outfile_rdata" file="2123_auto_combined.RData" compare="sim_size" />
             <output name="QC_overview" file="2123_auto_combined_QC.pdf" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="2123_auto_combined.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="2123_auto_combined.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="2123_auto_combined.ibd" name="ibd" compare="sim_size"/>
+            </output>
         </test>
     </tests>
     <help>
@@ -553,17 +519,16 @@
 
 -----
 
-This tool uses the Cardinal combine function to combine several mass spectrometry imaging data. 
+This tool combines several mass spectrometry imaging data files.
 
 @MSIDATA_INPUT_DESCRIPTION@
-- MSI data files must have the same m/z values (to obtain same m/z values for different files: filtering tool same m/z range and preprocessing tool same binning width)
 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before the tools analysis starts. 
 @SPECTRA_TABULAR_INPUT_DESCRIPTION@
 
 - For xy shifts with tabular file: Tabular file with x and y coordinates shift and file name
 
-    - Each input file is shifted in x and y direction according to this tabular file. In the example the files have about the same pixel dimensions which is smaller than 510x260.
-    - The file can have any column names as header (in this case set "Tabular file contains a header line" to yes) or no header at all
+    - Each input file is renamed and shifted in x and y direction according to this tabular file. In the example the files have about the same pixel dimensions which is smaller than 510x260.
+    - The file can have any column names as header (in this case set "Tabular file contains a header line" to "Yes") or no header at all (set "Tabular file contains a header line" to "No").
 
         ::
         
@@ -580,24 +545,28 @@
 
 **Options**
 
-- "automatic combination": files are automatically arranged in a grid (duplicated pixels are allowed), subfiles are named according to the input file name
+- "automatic combination": files are automatically arranged in a grid, subfiles are named according to the input file name
 - "xy shifts": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column as shown above). The xy shift option combines all datasets and removes all duplicated pixels (same x and y coordinates).
+- "Set all file coordinates to 1/1 as origin" can be choosen to override current pixel coordinates of the input file and set their minimal x and minimal y values to 1. Then the shifts from the xy shift tabular files are used to move the pixels of each dataset. 
+- In case the input files have not exactly the same m/z values set "Do the files have different m/z axis?" to "Yes". Then functionalities of the "MALDIquant" package are used to combine the datasets. 
+
 
 **Tips**
 
-- The combine tools puts all samples into a common x-y-grid, therefore pixel coordinates will change. In case the pixels are already annotated, the annotations should be provided as tabular files and the tool will return an annotation file with the new pixel coordinates. This annotation file can then be used together with the combined MSI data for tools in which the annotation is required (e.g. 'MSI classification') or useful (e.g. 'MSI spectra plots').
-- In case more annotations are required: The annotation input file should have an identifier column, for example the patient_ID. A second tabular file that contains more annotations and also one column with the identifier column (e.g. 'patient_ID') can be merged to the annotation output file of this tool with the tool 'join two files' and then set the 'Column to use' parameters for both files to the identifier column. 
-
+- The combine tool puts all samples into a common x-y-grid, therefore pixel coordinates will change. In case the pixels are already annotated, the annotations should be provided as tabular files and the tool will return an annotation file with the new pixel coordinates. This annotation file can then be used together with the combined MSI data for tools in which the annotation is required (e.g. 'MSI classification') or useful (e.g. 'MSI spectra plots').
 
 
 **Output**
 
-- MSI data as imzML file or .RData (can be read with the Cardinal package in R)
-- pdf that shows the pixel positions and annotations of the combined files
-- Tabular file with pixel annotations (x,y,column with input file names and up to five annotation columns)
+- MSI data as imzML file (in continous format when m/z axis were the same; in processed format when m/z axis were different)
+- Pdf with pixel positions and annotations of the combined files
+- Tabular file with pixel annotations (x,y,column with input file names and annotation columns)
 
 
 ]]>
     </help>
-    <expand macro="citations"/>
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btv146</citation>
+            <citation type="doi">10.1007/978-3-319-45809-0_6</citation>
+        </citations>
 </tool>