Mercurial > repos > galaxyp > msi_combine
diff msi_combine.xml @ 6:f4aafc565aa3 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit 5bceedc3a11c950790692a4c64bbb83d46897bee
author | galaxyp |
---|---|
date | Tue, 24 Jul 2018 04:52:39 -0400 |
parents | ff91e78b5c5c |
children | 19d8eee15959 |
line wrap: on
line diff
--- a/msi_combine.xml Fri Jul 06 14:13:08 2018 -0400 +++ b/msi_combine.xml Tue Jul 24 04:52:39 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.3"> +<tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.4"> <description> combine several mass spectrometry imaging datasets into one </description> @@ -20,6 +20,10 @@ ln -s '$infile' infile_${i}.RData && #end if #end for + #for $i, $annotation_file in enumerate($annotation_files): + ln -s '$annotation_file' annotation_file_${i}.tabular && + #end for + cat '${msi_combine}' && Rscript '${msi_combine}' @@ -67,25 +71,52 @@ #for $i, $infile in enumerate($infiles): -#if $infile.ext == 'imzml' - #if str($processed_cond.processed_file) == "processed": - msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") + ## read MSI data + + #if $infile.ext == 'imzml' + #if str($processed_cond.processed_file) == "processed": + msidata_$i <- readImzML('infile_${i}', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") + #else + msidata_$i <- readImzML('infile_${i}') + #end if + #elif $infile.ext == 'analyze75' + msidata_$i <- readAnalyze('infile_${i}') #else - msidata <- readImzML('infile') + msidata_$i = loadRData('infile_${i}.RData') #end if -#elif $infile.ext == 'analyze75' - msidata_$i <- readAnalyze('infile_${i}') -#else - msidata_$i = loadRData('infile_${i}.RData') -#end if - sampleNames(msidata_$i) = "msidata" ## same name necessary to combine data in one single coordinate system + ## read annotation data, up to 5 annotations can be used for now + + ## read annotation tabular, set first two columns as x and y, merge with coordinates dataframe and order according to pixelorder in msidata + input_annotation = read.delim("annotation_file_${i}.tabular", header = TRUE, + stringsAsFactors = FALSE) + colnames(input_annotation)[1:2] = c("x", "y") + msidata_coordinates = cbind(coord(msidata_$i)[,1:2], 1:ncol(msidata_$i)) + colnames(msidata_coordinates)[3] = "pixel_index" + ## only first 5 annotation columns are kept + if (ncol(input_annotation) > 7){ + input_annotation = input_annotation[,1:7]} + + annotation_df = merge(msidata_coordinates, input_annotation, by=c("x", "y"), all.x=TRUE) + annotation_df_8 = cbind(annotation_df, data.frame(matrix(NA,ncol=8-ncol(annotation_df), nrow=ncol(msidata_$i)))) + annotation_df_8_sorted = annotation_df_8[order(annotation_df_8\$pixel_index),]## orders pixel according to msidata + + ## each annotation column is assigned to the pixel in the pData slot of the MSIdata + msidata_$i\$column1 = annotation_df_8_sorted[,4] + msidata_$i\$column2 = annotation_df_8_sorted[,5] + msidata_$i\$column3 = annotation_df_8_sorted[,6] + msidata_$i\$column4 = annotation_df_8_sorted[,7] + msidata_$i\$column5 = annotation_df_8_sorted[,8] + + ## same name for MSI data files necessary to combine data in one single coordinate system + sampleNames(msidata_$i) = "msidata" ################### preparation xy shifts ########################## #if str( $combine_conditional.combine_method ) == 'xy_shifts': + ## shift coordinates according to input tabular file and store file names coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] ## shifts x coordinates according to tabular file coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] ## shifts y coordinates according to tabular file pixel_vector = append(pixel_vector, rep(paste($i+1, input_list[$i+1,$combine_conditional.column_names], sep="_"),times=ncol(msidata_$i))) ## stores file name for each pixel @@ -132,6 +163,8 @@ #end for +## extract columnnames from (last) annotation tabular (for QC plot names) +annotation_colnames = colnames(input_annotation)[-c(1,2)] ###################### automatic combination ################################### ################################################################################ @@ -154,7 +187,7 @@ combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ geom_tile() + coord_fixed()+ - ggtitle("Spatial orientation of combined data")+ + ggtitle("Spatial orientation of combined data (sample names)")+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ @@ -179,82 +212,174 @@ #elif str( $combine_conditional.combine_method ) == 'xy_shifts': print("xy_shifts") - #if str($combine_conditional.combination_true) == "yes_combi": - print("combination with xy shifts") - - ## find duplicated coordinates - all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#)) - duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE) + ## find duplicated coordinates + all_coordinates = do.call(rbind, list(#echo ','.join($pixelcoords)#)) + duplicated_coordinates= duplicated(all_coordinates[,1:2])| duplicated(all_coordinates[,1:2], fromLast=TRUE) print(paste0("Number of removed duplicated coordinates: ", sum(duplicated_coordinates)/2)) - unique_coordinates = all_coordinates[!duplicated_coordinates,] + unique_coordinates = all_coordinates[!duplicated_coordinates,] - ## remove duplicated coordinates - datasetlist = list() - count = 1 - for (usable_dataset in list(#echo ','.join($msidata)#)){ - pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)] - filtered_dataset = usable_dataset[,pixelsofinterest] - if (ncol(filtered_dataset) > 0 ){ - datasetlist[[count]] = filtered_dataset} - count = count +1} + ## remove duplicated coordinates + datasetlist = list() + count = 1 + for (usable_dataset in list(#echo ','.join($msidata)#)){ + pixelsofinterest = pixels(usable_dataset)[names(pixels(usable_dataset)) %in% rownames(unique_coordinates)] + filtered_dataset = usable_dataset[,pixelsofinterest] + if (ncol(filtered_dataset) > 0 ){ + datasetlist[[count]] = filtered_dataset} + count = count +1} + + msidata_combined = do.call(combine, datasetlist) - msidata_combined = do.call(combine, datasetlist) + ## save as (.RData) + + msidata = msidata_combined + save(msidata, file="$msidata_combined") - ## save as (.RData) + ## create x,y,sample_name dataframe for QC pdf + + position_df = cbind(coord(msidata), msidata\$combined_sample) + colnames(position_df)[3] = "sample_name" + +#end if + - msidata = msidata_combined - save(msidata, file="$msidata_combined") - - ## create x,y,sample_name dataframe for QC pdf +################################## outputs #################################### +################################################################################ - position_df = cbind(coord(msidata), msidata\$combined_sample) - colnames(position_df)[3] = "sample_name" +########### QC with pixels and their annotations ################################ - #else: - print("no combination, only testing xy shifts") +pdf("Combined_qc.pdf", width=15, height=15) - position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#)) - position_df\$sample_name = as.factor(pixel_vector) - -print(paste0("Number of duplicated coordinates: ", sum(duplicated(position_df[,1:2])))) +## combined plot +combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE)) +coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) +coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) +for(file_count in 1:nrow(coord_labels)) +{combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], +y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} +print(combine_plot) - #end if + + ## annotation plots + + ## plot 1 - ## create PDF to show all pixels in PDF as QC + column1_df = cbind(coord(msidata), msidata\$column1) + colnames(column1_df)[3] = "column1" - pdf("Combined_qc.pdf", width=15, height=15) - combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + if (sum(is.na(column1_df[3])) < nrow(column1_df)){ + column1_plot = ggplot(column1_df, aes(x=x, y=y, fill=column1))+ geom_tile() + coord_fixed()+ - ggtitle("Spatial orientation of combined data")+ + ggtitle(paste0(annotation_colnames[1]))+ theme_bw()+ theme(text=element_text(family="ArialMT", face="bold", size=15))+ theme(legend.position="bottom",legend.direction="vertical")+ - guides(fill=guide_legend(ncol=5,byrow=TRUE)) - coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) - coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) - for(file_count in 1:nrow(coord_labels)) - {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], - y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} - print(combine_plot) + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[1])) + print(column1_plot)} + ##rename columnname for output tabular file + colnames(column1_df)[3] = annotation_colnames[1] + + ## plot 2 + column2_df = cbind(coord(msidata), msidata\$column2) + colnames(column2_df)[3] = "column2" + + if (sum(is.na(column2_df[3])) < nrow(column2_df)){ + column2_plot = ggplot(column2_df, aes(x=x, y=y, fill=column2))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[2]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[2])) + print(column2_plot)} + ##rename columnname for output tabular file + colnames(column2_df)[3] = annotation_colnames[2] + + ## plot 3 + column3_df = cbind(coord(msidata), msidata\$column3) + colnames(column3_df)[3] = "column3" + if (sum(is.na(column3_df[3])) < nrow(column3_df)){ + column3_plot = ggplot(column3_df, aes(x=x, y=y, fill=column3))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[3]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[3])) + print(column3_plot)} + ##rename columnname for output tabular file + colnames(column3_df)[3] = annotation_colnames[3] + + ## plot 4 + column4_df = cbind(coord(msidata), msidata\$column4) + colnames(column4_df)[3] = "column4" + + if (sum(is.na(column4_df[3])) < nrow(column4_df)){ + column4_plot = ggplot(column4_df, aes(x=x, y=y, fill=column4))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[4]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[4])) + print(column4_plot)} + ##rename columnname for output tabular file + colnames(column4_df)[3] = annotation_colnames[4] + + ## plot5 + + column5_df = cbind(coord(msidata), msidata\$column5) + colnames(column5_df)[3] = "column5" + if (sum(is.na(column5_df[3])) < nrow(column5_df)){ + column5_plot = ggplot(column5_df, aes(x=x, y=y, fill=column5))+ + geom_tile() + + coord_fixed()+ + ggtitle(paste0(annotation_colnames[5]))+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=5,byrow=TRUE, title=annotation_colnames[5])) + print(column5_plot)} + ##rename columnname for output tabular file + colnames(column5_df)[3] = annotation_colnames[5] + dev.off() -#end if - -####################### optional matrix output ################################# - -#if $output_matrix: +##################### annotation tabular output ################################ if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ - spectramatrix = spectra(msidata)[] - spectramatrix = cbind(mz(msidata),spectramatrix) - newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) - write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + annotation_df_list = list(position_df, column1_df, column2_df, column3_df, column4_df, column5_df) + combined_annotations = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), annotation_df_list) + write.table(combined_annotations, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") }else{ - print("file has no features or pixels left") + print("No annotation tabular output because file has no features or pixels left") } -#end if + ####################### optional matrix output ################################# + + #if $output_matrix: + + if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ + spectramatrix = spectra(msidata)[] + spectramatrix = cbind(mz(msidata),spectramatrix) + newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) + write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + }else{ + print("No intensity matrix output because file has no features or pixels left") + } + + #end if ]]></configfile> </configfiles> @@ -276,19 +401,21 @@ </param> </when> </conditional> + <param name="annotation_files" type="data" multiple="true" format="tabular" + label="Pixel annotations as tabular files" + help="Tabular files should have the same order as input files"/> <conditional name="combine_conditional"> <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom"> <option value="automatic_combine" selected="True" >automatic combination</option> - <option value="xy_shifts">xy shifts by hand</option> + <option value="xy_shifts">shift xy coordinates with a tabular file</option> </param> <when value="automatic_combine"/> <when value="xy_shifts"> - <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining" - help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/> + <param name="coordinates_file" type="data" format="tabular" label="datasetnames, x and y values to shift pixel coordinates before combining" + help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift. Pixels with the same coordinates after shifting will be deleted."/> <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/> <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/> <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/> - <param name="combination_true" type="boolean" display="radio" truevalue="yes_combi" falsevalue="no_combi" label="Combine datasets" help = "If there are duplicated pixels they will be deleted. If it is not clear if there are duplicated pixels, select No to get an idea about the pixel overlap"/> </when> </conditional> <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> @@ -296,47 +423,42 @@ <outputs> <data format="rdata" name="msidata_combined" label="MSI_data_combined"/> <data format="pdf" name="combining_qc" from_work_dir="Combined_qc.pdf" label = "Combined_QC"/> + <data format="tabular" name="annotation_output" label="Annotation_tabular"/> <data format="tabular" name="matrixasoutput" label="Combined_matrix"> <filter>output_matrix</filter> </data> </outputs> <tests> - <test expect_num_outputs="3"> + <test expect_num_outputs="4"> <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> + <param name="annotation_files" value="annotations_file1.tabular,annotations_file2.tabular,annotations_file3.tabular" ftype="tabular"/> <param name="combine_method" value="xy_shifts"/> <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_names" value="3"/> - <param name="combination_true" value="yes_combi"/> <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="123_combined_matrix.tabular"/> + <output name="annotation_output" file="123_annotation_output.tabular"/> <output name="msidata_combined" file="123_combined.RData" compare="sim_size" /> <output name="combining_qc" file="123_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> - <test expect_num_outputs="2"> - <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> - <param name="combine_method" value="xy_shifts"/> - <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> - <param name="column_x" value="1"/> - <param name="column_y" value="2"/> - <param name="column_names" value="3"/> - <param name="combination_true" value="no_combi"/> - <output name="msidata_combined" file="123_no_combi.RData" compare="sim_size" /> - <output name="combining_qc" file="123_no_combi_QC.pdf" compare="sim_size" delta="20000"/> - </test> - <test expect_num_outputs="3"> + <test expect_num_outputs="4"> <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/> + <param name="annotation_files" value="annotations_file1.tabular,annotations_file2.tabular" ftype="tabular"/> <param name="combine_method" value="automatic_combine"/> <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="12_combined_matrix.tabular"/> + <output name="annotation_output" file="12_annotation_output.tabular"/> <output name="msidata_combined" file="12_combined.RData" compare="sim_size" /> <output name="combining_qc" file="12_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> - <test expect_num_outputs="2"> + <test expect_num_outputs="3"> <param name="infiles" value="msidata_1.RData,123_combined.RData" ftype="rdata"/> + <param name="annotation_files" value="annotations_file1.tabular,123_annotation.tabular" ftype="tabular"/> <param name="combine_method" value="automatic_combine"/> <param name="output_matrix" value="False"/> + <output name="annotation_output" file="112_annotation_output.tabular"/> <output name="msidata_combined" file="112_auto_combined.RData" compare="sim_size" /> <output name="combining_qc" file="112_auto_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> @@ -354,20 +476,25 @@ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) -Prerequisite: + +Input: -- m/z values need to be the same across all datasets (before using this tool), this can be achieved with the filtering tool (use same m/z range) and the preprocessing tool (use same binning parameter) +- MSI data files with same m/z values (to obtain same m/z values for different files: filtering tool same m/z range and preprocessing tool same binning width) +- Tabular files with pixel annotations need to have the x values in the first column, y values in the second column and then up to five annotations in the next columns. The order of the annotations in the columns must be the same for all files (x and y in column 1 and 2; annotation1 in column3, annotation2 in column4,...) +- The order and the number of MSI data files and annotation tabular files must be the same +- For xy shifts with tabular file: Tabular file with x and y coordinates shift and file name (see below) Options: -- "automatic combination": files are automatically arranged in a grid, subfiles are named according to input file name -- "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). To test if the pixels are correctly shifted use "combine datasets: No". -Combine datasets: Yes - Combines all datasets and removes all duplicated pixels (same x and y coordinates). +- "automatic combination": files are automatically arranged in a grid (duplicated pixels are allowed), subfiles are named according to the input file name +- "xy shifts by hand": each file can be moved in x and y direction according to the users need (define one tabular file in the order in which the files are loaded in the history (bottom to top) and define for each file the x and y coordinates shifts in separate columns and the file name in a third column). The xy shift option combines all datasets and removes all duplicated pixels (same x and y coordinates). + Output: -- imzML file containing multiple subfiles -- pdf that shows the pixel positions of the combined files +- single imzML file +- pdf that shows the pixel positions and annotations of the combined files +- Tabular file with pixel annotations (x,y,column with input file names, up to five annotation columns) - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)