# HG changeset patch # User galaxyp # Date 1536082977 14400 # Node ID c6564ddf0744eea2da6938b988d62e983fe573fb # Parent 19d8eee15959efa83a03fd9c2179aefb5658e517 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e diff -r 19d8eee15959 -r c6564ddf0744 msi_combine.xml --- a/msi_combine.xml Wed Aug 22 13:41:16 2018 -0400 +++ b/msi_combine.xml Tue Sep 04 13:42:57 2018 -0400 @@ -1,4 +1,4 @@ - + combine several mass spectrometry imaging datasets into one @@ -20,9 +20,11 @@ ln -s '$infile' infile_${i}.RData && #end if #end for - #for $i, $annotation_file in enumerate($annotation_files): - ln -s '$annotation_file' annotation_file_${i}.tabular && - #end for + #if $annotation_cond.annotation_tabular == 'annotation' + #for $i, $annotation_file in enumerate($annotation_cond.annotation_files): + ln -s '$annotation_file' annotation_file_${i}.tabular && + #end for + #end if cat '${msi_combine}' && Rscript '${msi_combine}' @@ -40,22 +42,18 @@ library(ggplot2) ## read tabular file for xy_shift option - #if str( $combine_conditional.combine_method ) == 'xy_shifts': input_list = read.delim("$combine_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE) #end if -## load RData and store with new variable name - +## function to load RData and store with new variable name loadRData <- function(fileName){ -#loads an RData file, and returns it load(fileName) get(ls()[ls() != "fileName"]) } ## preparations for reading files one by one with for loop - pixel_vector = numeric() x_shifts = 0 y_shifts = 0 @@ -71,7 +69,7 @@ #for $i, $infile in enumerate($infiles): - ## read MSI data + ## read and manipulate MSI data #if $infile.ext == 'imzml' #if str($processed_cond.processed_file) == "processed": @@ -85,40 +83,64 @@ msidata_$i = loadRData('infile_${i}.RData') #end if - - ## read annotation data, up to 5 annotations can be used for now - - ## read annotation tabular, set first two columns as x and y, merge with coordinates dataframe and order according to pixelorder in msidata - input_annotation = read.delim("annotation_file_${i}.tabular", header = TRUE, - stringsAsFactors = FALSE) - - if (class(input_annotation[,1]) == "character"){ - annotation_coordinates = matrix(unlist(strsplit(as.character(input_annotation[,1]), "\\,")), ncol=2, byrow=TRUE) - annotation_coordinates2 = cbind(as.numeric(substring(annotation_coordinates[,1], 5, last = 1000000L)), as.numeric(substring(annotation_coordinates[,2], 5, last = 1000000L))) - input_annotation = cbind(annotation_coordinates2, input_annotation[,-1]) - } - - colnames(input_annotation)[1:2] = c("x", "y") - msidata_coordinates = cbind(coord(msidata_$i)[,1:2], 1:ncol(msidata_$i)) - colnames(msidata_coordinates)[3] = "pixel_index" - ## only first 5 annotation columns are kept - if (ncol(input_annotation) > 7){ - input_annotation = input_annotation[,1:7]} - - annotation_df = merge(msidata_coordinates, input_annotation, by=c("x", "y"), all.x=TRUE) - annotation_df_8 = cbind(annotation_df, data.frame(matrix(NA,ncol=8-ncol(annotation_df), nrow=ncol(msidata_$i)))) - annotation_df_8_sorted = annotation_df_8[order(annotation_df_8\$pixel_index),]## orders pixel according to msidata - - ## each annotation column is assigned to the pixel in the pData slot of the MSIdata - msidata_$i\$column1 = annotation_df_8_sorted[,4] - msidata_$i\$column2 = annotation_df_8_sorted[,5] - msidata_$i\$column3 = annotation_df_8_sorted[,6] - msidata_$i\$column4 = annotation_df_8_sorted[,7] - msidata_$i\$column5 = annotation_df_8_sorted[,8] + ## remove duplicated coordinates, otherwise combine will fail + print(paste0(sum(duplicated(coord(msidata_$i))), " duplicated coordinates were removed from input file")) + msidata_${i} <- msidata_${i}[,!duplicated(coord(msidata_${i}))] ## same name for MSI data files necessary to combine data in one single coordinate system sampleNames(msidata_$i) = "msidata" + + ## read and process annotation tabular or automatically use name of infile as annotation + + ## set all pixel annotations to NA, necessary in case files were combined before with different annotations + msidata_$i\$column1 = rep(NA, ncol(msidata_$i)) + msidata_$i\$column2 = rep(NA, ncol(msidata_$i)) + msidata_$i\$column3 = rep(NA, ncol(msidata_$i)) + msidata_$i\$column4 = rep(NA, ncol(msidata_$i)) + msidata_$i\$column5 = rep(NA, ncol(msidata_$i)) + msidata_$i\$combined_sample = rep(NA, ncol(msidata_$i)) + + #if str($annotation_cond.annotation_tabular) == 'annotation' + print("annotations") + + ## read annotation tabular, set first two columns as x and y, merge with coordinates dataframe and order according to pixelorder in msidata + input_annotation = read.delim("annotation_file_${i}.tabular", header = $annotation_cond.tabular_header, stringsAsFactors = FALSE) + + ## change format x = 1, y = 2 into two columns + if (class(input_annotation[,1]) == "character"){ + annotation_names = colnames(input_annotation[2:ncol(input_annotation)]) + annotation_coordinates = matrix(unlist(strsplit(as.character(input_annotation[,1]), "\\,")), ncol=2, byrow=TRUE) + annotation_coordinates2 = cbind(as.numeric(substring(annotation_coordinates[,1], 5, last = 1000000L)), as.numeric(substring(annotation_coordinates[,2], 5, last = 1000000L))) + input_annotation = cbind(annotation_coordinates2, input_annotation[,-1]) + colnames(input_annotation) = c("x", "y", annotation_names) + } + + colnames(input_annotation)[1:2] = c("x", "y") + msidata_coordinates = cbind(coord(msidata_$i)[,1:2], 1:ncol(msidata_$i)) + colnames(msidata_coordinates)[3] = "pixel_index" + + ## only first 5 annotation columns are kept + if (ncol(input_annotation) > 7){ + input_annotation = input_annotation[,1:7]} + + annotation_df = merge(msidata_coordinates, input_annotation, by=c("x", "y"), all.x=TRUE) + annotation_df_8 = cbind(annotation_df, data.frame(matrix(NA,ncol=8-ncol(annotation_df), nrow=ncol(msidata_$i)))) + annotation_df_8_sorted = annotation_df_8[order(annotation_df_8\$pixel_index),]## orders pixel according to msidata + + ## each annotation column is assigned to the pixel in the pData slot of the MSIdata + msidata_$i\$column1 = annotation_df_8_sorted[,4] + msidata_$i\$column2 = annotation_df_8_sorted[,5] + msidata_$i\$column3 = annotation_df_8_sorted[,6] + msidata_$i\$column4 = annotation_df_8_sorted[,7] + msidata_$i\$column5 = annotation_df_8_sorted[,8] + + ## extract columnnames from (last) annotation tabular (for QC plot names) + annotation_colnames = colnames(input_annotation)[-c(1,2)] + + #end if + + ################### preparation xy shifts ########################## #if str( $combine_conditional.combine_method ) == 'xy_shifts': @@ -135,8 +157,10 @@ ################### preparation automatic combination ########################## #elif str( $combine_conditional.combine_method ) == 'automatic_combine': + + ## use name of Galaxy inputfile as combined sample annotation names_vector = character() - #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) ## use name of inputfile from Galaxy + #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) if (sum(spectra(msidata_$i)[],na.rm=TRUE)>0) ## use only valid files { if (is.null(levels(msidata_$i\$combined_sample))) @@ -170,8 +194,6 @@ #end for -## extract columnnames from (last) annotation tabular (for QC plot names) -annotation_colnames = colnames(input_annotation)[-c(1,2)] ###################### automatic combination ################################### ################################################################################ @@ -212,17 +234,16 @@ msidata = msidata_combined save(msidata, file="$msidata_combined") - ################################## xy shifts ################################### ################################################################################ #elif str( $combine_conditional.combine_method ) == 'xy_shifts': print("xy_shifts") - ## find duplicated coordinates + ## in case user made mistake with xy shifts: find duplicated coordinates all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#)) duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE) -print(paste0("Number of removed duplicated coordinates: ", sum(duplicated_coordinates)/2)) +print(paste0("Number of removed duplicated coordinates after combination: ", sum(duplicated_coordinates)/2)) unique_coordinates = all_coordinates[!duplicated_coordinates,] ## remove duplicated coordinates @@ -269,106 +290,117 @@ coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) for(file_count in 1:nrow(coord_labels)) -{combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], -y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} + {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], + y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} print(combine_plot) + #if str($annotation_cond.annotation_tabular) == 'annotation' - ## annotation plots + ## annotation plots - ## plot 1 + ## plot 1 - column1_df = cbind(coord(msidata), msidata\$column1) - colnames(column1_df)[3] = "column1" + column1_df = cbind(coord(msidata), msidata\$column1) + colnames(column1_df)[3] = "column1" - if (sum(is.na(column1_df[3])) < nrow(column1_df)){ - column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+ - geom_tile() + - coord_fixed()+ - ggtitle(paste0(annotation_colnames[1]))+ - theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=15))+ - theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[1])) - print(column1_plot)} - ##rename columnname for output tabular file - colnames(column1_df)[3] = annotation_colnames[1] + if (sum(is.na(column1_df[3])) < nrow(column1_df)){ + column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[1]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[1])) + print(column1_plot)} + ##rename columnname for output tabular file + colnames(column1_df)[3] = annotation_colnames[1] - ## plot 2 - column2_df = cbind(coord(msidata), msidata\$column2) - colnames(column2_df)[3] = "column2" + ## plot 2 + column2_df = cbind(coord(msidata), msidata\$column2) + colnames(column2_df)[3] = "column2" - if (sum(is.na(column2_df[3])) < nrow(column2_df)){ - column2_plot = ggplot(column2_df, aes(x=x, y=y, fill=column2))+ - geom_tile() + - coord_fixed()+ - ggtitle(paste0(annotation_colnames[2]))+ - theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=15))+ - theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[2])) - print(column2_plot)} - ##rename columnname for output tabular file - colnames(column2_df)[3] = annotation_colnames[2] + if (sum(is.na(column2_df[3])) < nrow(column2_df)){ + column2_plot = ggplot(column2_df, aes(x=x, y=y, fill=column2))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[2]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[2])) + print(column2_plot)} + ##rename columnname for output tabular file + colnames(column2_df)[3] = annotation_colnames[2] - ## plot 3 - column3_df = cbind(coord(msidata), msidata\$column3) - colnames(column3_df)[3] = "column3" - if (sum(is.na(column3_df[3])) < nrow(column3_df)){ - column3_plot = ggplot(column3_df, aes(x=x, y=y, fill=column3))+ - geom_tile() + - coord_fixed()+ - ggtitle(paste0(annotation_colnames[3]))+ - theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=15))+ - theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[3])) - print(column3_plot)} - ##rename columnname for output tabular file - colnames(column3_df)[3] = annotation_colnames[3] + ## plot 3 + column3_df = cbind(coord(msidata), msidata\$column3) + colnames(column3_df)[3] = "column3" + if (sum(is.na(column3_df[3])) < nrow(column3_df)){ + column3_plot = ggplot(column3_df, aes(x=x, y=y, fill=column3))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[3]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[3])) + print(column3_plot)} + ##rename columnname for output tabular file + colnames(column3_df)[3] = annotation_colnames[3] - ## plot 4 - column4_df = cbind(coord(msidata), msidata\$column4) - colnames(column4_df)[3] = "column4" + ## plot 4 + column4_df = cbind(coord(msidata), msidata\$column4) + colnames(column4_df)[3] = "column4" - if (sum(is.na(column4_df[3])) < nrow(column4_df)){ - column4_plot = ggplot(column4_df, aes(x=x, y=y, fill=column4))+ - geom_tile() + - coord_fixed()+ - ggtitle(paste0(annotation_colnames[4]))+ - theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=15))+ - theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[4])) - print(column4_plot)} - ##rename columnname for output tabular file - colnames(column4_df)[3] = annotation_colnames[4] + if (sum(is.na(column4_df[3])) < nrow(column4_df)){ + column4_plot = ggplot(column4_df, aes(x=x, y=y, fill=column4))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[4]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[4])) + print(column4_plot)} + ##rename columnname for output tabular file + colnames(column4_df)[3] = annotation_colnames[4] - ## plot5 + ## plot5 - column5_df = cbind(coord(msidata), msidata\$column5) - colnames(column5_df)[3] = "column5" - if (sum(is.na(column5_df[3])) < nrow(column5_df)){ - column5_plot = ggplot(column5_df, aes(x=x, y=y, fill=column5))+ - geom_tile() + - coord_fixed()+ - ggtitle(paste0(annotation_colnames[5]))+ - theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=15))+ - theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[5])) - print(column5_plot)} - ##rename columnname for output tabular file - colnames(column5_df)[3] = annotation_colnames[5] + column5_df = cbind(coord(msidata), msidata\$column5) + colnames(column5_df)[3] = "column5" + if (sum(is.na(column5_df[3])) < nrow(column5_df)){ + column5_plot = ggplot(column5_df, aes(x=x, y=y, fill=column5))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[5]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[5])) + print(column5_plot)} + ##rename columnname for output tabular file + colnames(column5_df)[3] = annotation_colnames[5] + + #end if dev.off() ##################### annotation tabular output ################################ if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ - annotation_df_list = list(position_df, column1_df, column2_df, column3_df, column4_df, column5_df) - combined_annotations = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), annotation_df_list) - write.table(combined_annotations, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + + #if str($annotation_cond.annotation_tabular) == 'no_annotation': + write.table(position_df, file="$pixel_annotations", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + + #else + annotation_df_list = list(position_df, column1_df, column2_df, column3_df, column4_df, column5_df) + combined_annotations = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), annotation_df_list) + write.table(combined_annotations, file="$pixel_annotations", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") + + #end if + }else{ print("No annotation tabular output because file has no features or pixels left") } @@ -380,8 +412,8 @@ if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ spectramatrix = spectra(msidata)[] spectramatrix = cbind(mz(msidata),spectramatrix) - newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) - write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") }else{ print("No intensity matrix output because file has no features or pixels left") } @@ -408,11 +440,21 @@ - + + + + + + + + + + + - + @@ -429,45 +471,68 @@ - - - + + + output_matrix - + + + + + - - + + - + - + + + + + - - + + - + - + + + + + - + - + + + + + + + + + + + + diff -r 19d8eee15959 -r c6564ddf0744 test-data/112_auto_combined_QC.pdf Binary file test-data/112_auto_combined_QC.pdf has changed diff -r 19d8eee15959 -r c6564ddf0744 test-data/123_combined_QC.pdf Binary file test-data/123_combined_QC.pdf has changed diff -r 19d8eee15959 -r c6564ddf0744 test-data/123_combined_matrix.tabular --- a/test-data/123_combined_matrix.tabular Wed Aug 22 13:41:16 2018 -0400 +++ b/test-data/123_combined_matrix.tabular Tue Sep 04 13:42:57 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 1 x = 1, y = 2 x = 1, y = 3 x = 3, y = 1 x = 4, y = 1 x = 3, y = 2 x = 4, y = 2 x = 3, y = 3 x = 4, y = 3 x = 9, y = 1 x = 9, y = 2 x = 9, y = 3 +mz x = 1, y = 1 x = 1, y = 2 x = 1, y = 3 x = 3, y = 1 x = 4, y = 1 x = 3, y = 2 x = 4, y = 2 x = 3, y = 3 x = 4, y = 3 x = 9, y = 1 x = 9, y = 2 x = 9, y = 3 100.083335876465 0 0 0 0 0 0 0 0 0 0 0 0 100.166664123535 0 0 0 0 0 0 0 0 0 0 0 0 100.25 0 0 0 0 0 0 0 0 0 0 0 0 diff -r 19d8eee15959 -r c6564ddf0744 test-data/12_combined_QC.pdf Binary file test-data/12_combined_QC.pdf has changed diff -r 19d8eee15959 -r c6564ddf0744 test-data/12_combined_matrix.tabular --- a/test-data/12_combined_matrix.tabular Wed Aug 22 13:41:16 2018 -0400 +++ b/test-data/12_combined_matrix.tabular Tue Sep 04 13:42:57 2018 -0400 @@ -1,4 +1,4 @@ -mz | spectra x = 1, y = 1 x = 1, y = 2 x = 1, y = 3 x = 7, y = 1 x = 8, y = 1 x = 7, y = 2 x = 8, y = 2 x = 7, y = 3 x = 8, y = 3 +mz x = 1, y = 1 x = 1, y = 2 x = 1, y = 3 x = 7, y = 1 x = 8, y = 1 x = 7, y = 2 x = 8, y = 2 x = 7, y = 3 x = 8, y = 3 100.083335876465 0 0 0 0 0 0 0 0 0 100.166664123535 0 0 0 0 0 0 0 0 0 100.25 0 0 0 0 0 0 0 0 0 diff -r 19d8eee15959 -r c6564ddf0744 test-data/2123_annotation_output.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2123_annotation_output.tabular Tue Sep 04 13:42:57 2018 -0400 @@ -0,0 +1,19 @@ +x y sample_name +1 1 1_msidata_2.RData +2 1 1_msidata_2.RData +1 2 1_msidata_2.RData +2 2 1_msidata_2.RData +1 3 1_msidata_2.RData +2 3 1_msidata_2.RData +8 1 2_123_combined.RData +8 2 2_123_combined.RData +8 3 2_123_combined.RData +10 1 2_123_combined.RData +11 1 2_123_combined.RData +10 2 2_123_combined.RData +11 2 2_123_combined.RData +10 3 2_123_combined.RData +11 3 2_123_combined.RData +16 1 2_123_combined.RData +16 2 2_123_combined.RData +16 3 2_123_combined.RData diff -r 19d8eee15959 -r c6564ddf0744 test-data/2123_auto_combined.RData Binary file test-data/2123_auto_combined.RData has changed diff -r 19d8eee15959 -r c6564ddf0744 test-data/2123_auto_combined_QC.pdf Binary file test-data/2123_auto_combined_QC.pdf has changed