Mercurial > repos > galaxyp > msi_preprocessing
changeset 8:d77c5228fd1a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5bceedc3a11c950790692a4c64bbb83d46897bee
| author | galaxyp | 
|---|---|
| date | Tue, 24 Jul 2018 04:53:10 -0400 | 
| parents | 1a3d477bc54a | 
| children | 4d5578b57a77 | 
| files | msi_preprocessing.xml test-data/inputcalibrantfile1.tabular test-data/inputcalibrantfile2.tabular test-data/pixel_annotations.tabular test-data/preprocessing_median2.txt test-data/preprocessing_results1.RData test-data/preprocessing_results1.pdf test-data/preprocessing_results2.RData test-data/preprocessing_results2.pdf test-data/preprocessing_results3.RData test-data/preprocessing_results3.pdf test-data/preprocessing_results4.RData test-data/preprocessing_results4.pdf test-data/preprocessing_results5.RData test-data/preprocessing_results5.pdf test-data/preprocessing_sd2.txt | 
| diffstat | 16 files changed, 128 insertions(+), 22 deletions(-) [+] | 
line wrap: on
 line diff
--- a/msi_preprocessing.xml Fri Jul 06 14:13:48 2018 -0400 +++ b/msi_preprocessing.xml Tue Jul 24 04:53:10 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.4"> +<tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5"> <description> mass spectrometry imaging preprocessing </description> @@ -6,6 +6,7 @@ <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-gridextra</requirement> <requirement type="package" version="0.20-35">r-lattice</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ @@ -33,6 +34,7 @@ library(Cardinal) library(gridExtra) library(lattice) +library(ggplot2) #if $infile.ext == 'imzml' #if str($processed_cond.processed_file) == "processed": @@ -319,20 +321,39 @@ ## optional summarized matrix print('Summarized matrix') + ## optional annotation from tabular file to obtain groups over which to apply mean, median or sd (otherwise all pixels are considered to be sample) + + #if str($tabular_annotation.load_annotation) == 'yes_annotation': + + ## read and extract x,y,annotation information + input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) + annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] + colnames(annotation_input) = c("x", "y", "annotation") + + ## merge with coordinate information of msidata + msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) + colnames(msidata_coordinates)[3] = "pixel_index" + merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) + merged_annotation[is.na(merged_annotation)] = "NA" + merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] + msidata\$annotation = as.factor(merged_annotation[,4]) + + #end if + #if "mean" in str($summary_type).split(","): print("mean matrix") - if (!is.null(levels(msidata\$combined_sample))){ + if (!is.null(levels(msidata\$annotation))){ sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] + for (subsample in levels(msidata\$annotation)){ + subsample_pixels = msidata[,msidata\$annotation == subsample] subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1 } rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) + colnames(sample_matrix) = levels(msidata\$annotation) write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") }else{ full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE)) @@ -345,18 +366,21 @@ #if "median" in str($summary_type).split(","): print("median matrix") - if (!is.null(levels(msidata\$combined_sample))){ + if (!is.null(levels(msidata\$annotation))){ sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] + for (subsample in levels(msidata\$annotation)){ + + subsample_pixels = msidata[,msidata\$annotation == subsample] ## no idea why it does not work??? NA problem?! + subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) + sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1 } rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) + colnames(sample_matrix) = levels(msidata\$annotation) write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") }else{ full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,median, na.rm=TRUE)) @@ -368,18 +392,18 @@ #if "sd" in str($summary_type).split(","): print("sd matrix") - if (!is.null(levels(msidata\$combined_sample))){ + if (!is.null(levels(msidata\$annotation))){ sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 - for (subsample in levels(msidata\$combined_sample)){ - subsample_pixels = msidata[,msidata\$combined_sample == subsample] + for (subsample in levels(msidata\$annotation)){ + subsample_pixels = msidata[,msidata\$annotation == subsample] subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1 } rownames(sample_matrix) = mz(msidata) - colnames(sample_matrix) = levels(msidata\$combined_sample) + colnames(sample_matrix) = levels(msidata\$annotation) write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") }else{ @@ -414,6 +438,45 @@ title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") grid.table(t(QC_numbers)) + + #if str($tabular_annotation.load_annotation) == 'yes_annotation': + + ## the more annotation groups a file has the smaller will be the legend + number_combined = length(levels(msidata\$annotation)) + if (number_combined<20){ + legend_size = 10 + }else if (number_combined>20 && number_combined<40){ + legend_size = 9 + }else if (number_combined>40 && number_combined<60){ + legend_size = 8 + }else if (number_combined>60 && number_combined<100){ + legend_size = 7 + }else{ + legend_size = 6 + } + + position_df = cbind(coord(msidata)[,1:2], msidata\$annotation) + colnames(position_df)[3] = "sample_name" + + combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of annotated data")+ + theme_bw()+ + theme(plot.title = element_text(hjust = 0.5))+ + theme(text=element_text(family="ArialMT", face="bold", size=12))+ + theme(legend.position="bottom",legend.direction="vertical")+ + theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ + guides(fill=guide_legend(ncol=5,byrow=TRUE)) + coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) + coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) + for(file_count in 1:nrow(coord_labels)) + {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"], + y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))} + + print(combine_plot) + #end if + dev.off() }else{ @@ -613,6 +676,21 @@ <option value="median">median</option> <option value="sd">standard deviation</option> </param> + <conditional name="tabular_annotation"> + <param name="load_annotation" type="select" label="Use pixel annotations from tabular file to summarize pixel"> + <option value="no_annotation" selected="True">summarize over all pixels</option> + <option value="yes_annotation">summarize over categories from annotation file</option> + </param> + <when value="yes_annotation"> + <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file to summarize pixel" + help="Tabular file with three columns: x values, y values and pixel annotations"/> + <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> + <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> + <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/> + <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + </when> + <when value="no_annotation"/> + </conditional> <param name="output_matrix" type="boolean" label="Intensity matrix output"/> </inputs> <outputs> @@ -682,6 +760,9 @@ </conditional> </conditional> </repeat> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="no_annotation"/> + </conditional> <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> <output name="matrixasoutput" file="preprocessing_results1.txt"/> @@ -708,6 +789,14 @@ </conditional> </conditional> </repeat> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="yes_annotation"/> + <param name="annotation_file" value="pixel_annotations.tabular"/> + <param name="column_x" value="1"/> + <param name="column_y" value="2"/> + <param name="column_names" value="3"/> + <param name="tabular_header" value="FALSE"/> + </conditional> <param name="summary_type" value="median,sd"/> <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/> @@ -742,6 +831,9 @@ </conditional> </repeat> <param name="summary_type" value="mean"/> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="no_annotation"/> + </conditional> <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/> @@ -763,6 +855,9 @@ <param name="bin_width" value="0.1"/> </conditional> </repeat> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="no_annotation"/> + </conditional> <param name="output_matrix" value="True"/> <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> <output name="matrixasoutput" file="preprocessing_results4.txt"/> @@ -782,6 +877,9 @@ </conditional> </conditional> </repeat> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="no_annotation"/> + </conditional> <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/> </test> @@ -798,6 +896,7 @@ - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) +- optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column Options: @@ -814,8 +913,9 @@ Output: - imzML file, preprocessed -- pdf with key values after each processing step +- pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) +- optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group Tip:
--- a/test-data/inputcalibrantfile1.tabular Fri Jul 06 14:13:48 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -101.5 -356.7 -555.1
--- a/test-data/inputcalibrantfile2.tabular Fri Jul 06 14:13:48 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -869.51 mass1 -1001.62 mass2 -1023.6 mass3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pixel_annotations.tabular Tue Jul 24 04:53:10 2018 -0400 @@ -0,0 +1,12 @@ +1 1 File1 +1 2 File1 +1 3 File1 +3 1 File2 +3 2 File2 +3 3 File2 +4 1 File2 +4 2 File2 +4 3 File2 +8 1 File3 +8 2 File3 +8 3 File3
--- a/test-data/preprocessing_median2.txt Fri Jul 06 14:13:48 2018 -0400 +++ b/test-data/preprocessing_median2.txt Tue Jul 24 04:53:10 2018 -0400 @@ -1,4 +1,4 @@ - 1_File1 2_File2 3_File3 + File1 File2 NA 101.083335876465 0 0.133186891674995 0.266373783349991 101.666664123535 0 0 0 102.166664123535 0 0 0
--- a/test-data/preprocessing_sd2.txt Fri Jul 06 14:13:48 2018 -0400 +++ b/test-data/preprocessing_sd2.txt Tue Jul 24 04:53:10 2018 -0400 @@ -1,4 +1,4 @@ - 1_File1 2_File2 3_File3 + File1 File2 NA 101.083335876465 0.180910895583245 0.284914371691127 0.358878736172051 101.666664123535 0 0 0 102.166664123535 0 0 0
