Mercurial > repos > galaxyp > cardinal_data_exporter
diff data_exporter.xml @ 7:350a84ea795c draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f986c51abe33c7f622d429a3c4a79ee24b33c1f3"
author | galaxyp |
---|---|
date | Thu, 23 Apr 2020 08:06:24 -0400 |
parents | e521b5767819 |
children | a5d09f2daf71 |
line wrap: on
line diff
--- a/data_exporter.xml Wed Mar 25 08:07:01 2020 -0400 +++ b/data_exporter.xml Thu Apr 23 08:06:24 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.2"> +<tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> <description> exports imzML and Analyze7.5 to tabular files </description> @@ -22,22 +22,27 @@ library(Cardinal) -@READING_MSIDATA_INRAM@ +@READING_MSIDATA@ +## in case RData input is MSImageSet: +if (class(msidata) == "MSImageSet"){ + msidata = as(msidata, "MSImagingExperiment") + run(msidata) = "infile" + } + ## extract spectra matrix once: + msidata_matrix = as.matrix(iData(msidata)) ###################### Intensity matrix output ################################ #if "int_matrix" in str($output_options).split(","): print("intensity matrix output") - mz_names = gsub(" = ", "_", names(features(msidata))) - mz_names = gsub("/", "", mz_names) - pixel_names = gsub(", y = ", "_", names(pixels(msidata))) - pixel_names = gsub(" = ", "y_", pixel_names) + mz_names = paste0("mz_", mz(msidata)) + pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) - ##spectramatrix = cbind(mz_names,spectra(msidata)) - newmatrix = rbind(c("mz_name", pixel_names), cbind(mz_names,spectra(msidata))) - write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") + write.table( + rbind(c("mz_name", pixel_names), cbind(mz_names,msidata_matrix)), ##create matrix + file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") ## free up RAM space in case furhter steps will be run: rm(newmatrix) gc() @@ -49,21 +54,18 @@ #if "mz_tabular" in str($output_options).split(","): print("mz feature output") - mz_names = gsub(" = ", "_", names(features(msidata))) - mz_names = gsub("/", "", mz_names) + full_sample_mean = apply(msidata_matrix,1,mean, na.rm=TRUE) + full_sample_sd = apply(msidata_matrix,1,sd, na.rm=TRUE) - ## mean, median, sd and SEM intensity per file and mz - full_sample_mean = rowMeans(spectra(msidata), na.rm=TRUE) - full_sample_median = apply(spectra(msidata),1,median, na.rm=TRUE) - full_sample_sd = apply(spectra(msidata),1,sd, na.rm=TRUE) - full_sample_sem = full_sample_sd/full_sample_mean*100 - ## npeaks and sum of all intensities per spectrum and mz - npeaks= sum(spectra(msidata)>0, na.rm=TRUE) - mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z - peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) + mz_df = data.frame(paste0("mz_", mz(msidata)), ##mz names + mz(msidata), ##mz values + full_sample_mean, ##mean + apply(msidata_matrix,1,median, na.rm=TRUE), ##median + full_sample_sd, ##sd + full_sample_sd/full_sample_mean*100, ##SEM + rowSums(msidata_matrix, na.rm=TRUE), ## calculate intensity sum for each m/z + rowSums(msidata_matrix > 0, na.rm=TRUE)) ## calculate number of intensities > 0 for each m/z (max = number of spectra) - ## combine into dataframe, order is the same for all vectors - mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## free up RAM space in case furhter steps will be run: @@ -82,16 +84,15 @@ colnames(annotation_input) = c("x", "y", "annotation") ## merge with coordinate information of msidata - msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) - colnames(msidata_coordinates)[3] = "pixel_index" + msidata_coordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata))) + colnames(msidata_coordinates) = c("x", "y", "pixel_index") merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) merged_annotation[is.na(merged_annotation)] = "NA" merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] msidata\$annotation = as.factor(merged_annotation[,4]) ## create m/z feature name - mz_names = gsub(" = ", "_", names(features(msidata))) - mz_names = gsub("/", "", mz_names) + mz_names = paste0("mz_", mz(msidata)) #if "mean" in str($tabular_annotation.summary_type).split(","): print("summarized mean") @@ -131,7 +132,7 @@ count = 1 for (subsample in levels(msidata\$annotation)){ subsample_pixels = msidata[,msidata\$annotation == subsample] - subsample_calc = apply(spectra(subsample_pixels),1,sd, na.rm=TRUE) + subsample_calc = apply(as.matrix(spectra(subsample_pixels)),1,sd, na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1} sample_matrix_sd = cbind(mz_names,sample_matrix) @@ -147,40 +148,32 @@ print("pixel output") ## coordinates - xycoordinates = coord(msidata)[,1:2] + xycoordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y) + colnames(xycoordinates) = c("x", "y") ## pixel name - pixel_names = paste0("xy_", xycoordinates\$x, "_", xycoordinates\$y) - - ## pixel order - pixelxyarray=1:length(pixels(msidata)) - - ## number of pixels per spectrum: every intensity value > 0 counts as peak - peaksperpixel = colSums(spectra(msidata)>0, na.rm=TRUE) - - ## Total ion chromatogram per spectrum - TICs = round(colSums(spectra(msidata), na.rm=TRUE), digits = 2) - - ## Median ion intensity per spectrum - med_int = round(apply(spectra(msidata), 2, median, na.rm=TRUE), digits = 2) - - ## Maximum ion intensity per spectrum - max_int = round(apply(spectra(msidata), 2, max, na.rm=TRUE), digits = 2) - - ## Highest m/z per spectrum - highestmz = apply(spectra(msidata),2,which.max) - highestmz_data = mz(msidata)[highestmz] + pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) ## Combine into dataframe; order is the same for all vectors - spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) + spectra_df = data.frame(pixel_names, + xycoordinates, + 1:length(pixels(msidata)), ##pixel order + colSums(msidata_matrix>0, na.rm=TRUE), ##peaks per pixel + round(apply(msidata_matrix, 2, median, na.rm=TRUE), digits = 2), ## median intensity + round(colSums(msidata_matrix, na.rm=TRUE), digits = 2), ##TICs + round(apply(msidata_matrix, 2, max, na.rm=TRUE), digits = 2), ##max intensity + mz(msidata)[apply(msidata_matrix,2,which.max) ]) ##highest mz + colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] + ### calculate how many input calibrant m/z are valid: inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] + inputcalibrantmasses = inputcalibrants[,1] ##QC plot number 2) Number of calibrants per spectrum @@ -214,11 +207,11 @@ }else{countvector = rep(0,ncol(msidata))} countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts - colnames(countdf) = c("x_values", "y_values", "m/z count") + colnames(countdf) = c("x_values", "y_values", "mz_count") spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) ## sort columns to have spectra_names as rowname in first column - spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count")] + spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count")] #end if #if str($tabular_annotation.load_annotation) == 'yes_annotation': @@ -228,7 +221,7 @@ ## sort columns to have spectra_names as rowname in first column #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": - spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count", "annotation")] + spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count", "annotation")] #else spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] #end if @@ -335,6 +328,24 @@ <output name="feature_output" file="features_out3.tabular"/> <output name="pixel_output" file="pixel_out3.tabular"/> </test> + <test expect_num_outputs="2"> + <expand macro="processed_infile_imzml"/> + <conditional name="processed_cond"> + <param name="processed_file" value="processed"/> + <param name="accuracy" value="100"/> + <param name="units" value="ppm"/> + </conditional> + <param name="output_options" value="pixel_tabular,mz_tabular"/> + <conditional name="counting_calibrants"> + <param name="pixel_with_calibrants" value="yes_calibrants"/> + <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> + <param name="feature_column" value="1"/> + <param name="feature_header" value="False"/> + <param name="plusminus_ppm" value="200"/> + </conditional> + <output name="feature_output" file="features_out4.tabular"/> + <output name="pixel_output" file="pixel_out4.tabular"/> + </test> </tests> <help> <![CDATA[