Mercurial > repos > galaxyp > cardinal_data_exporter
view data_exporter.xml @ 17:56da27be956a draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:34:08 +0000 |
parents | b6a4dd06cde0 |
children |
line wrap: on
line source
<tool id="cardinal_data_exporter" name="MSI data exporter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description> exports imzML and Analyze7.5 to tabular files </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${cardinal_imzml_exporter}' && Rscript '${cardinal_imzml_exporter}' ]]> </command> <configfiles> <configfile name="cardinal_imzml_exporter"><![CDATA[ ################################# load libraries and read file ################# library(Cardinal) @READING_MSIDATA@ ## in case RData input is MSImageSet: if (class(msidata) == "MSImageSet"){ msidata = as(msidata, "MSImagingExperiment") run(msidata) = "infile" } ## extract spectra matrix once: msidata_matrix = as.matrix(iData(msidata)) ###################### Intensity matrix output ################################ #if "int_matrix" in str($output_options).split(","): print("intensity matrix output") mz_names = paste0("mz_", mz(msidata)) pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) write.table( rbind(c("mz_name", pixel_names), cbind(mz_names,msidata_matrix)), ##create matrix file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") ## free up RAM space in case furhter steps will be run: rm(newmatrix) gc() #end if ############################## m/z feature output ########################## #if "mz_tabular" in str($output_options).split(","): print("mz feature output") full_sample_mean = apply(msidata_matrix,1,mean, na.rm=TRUE) full_sample_sd = apply(msidata_matrix,1,sd, na.rm=TRUE) mz_df = data.frame(paste0("mz_", mz(msidata)), ##mz names mz(msidata), ##mz values full_sample_mean, ##mean apply(msidata_matrix,1,median, na.rm=TRUE), ##median full_sample_sd, ##sd full_sample_sd/full_sample_mean*100, ##SEM rowSums(msidata_matrix, na.rm=TRUE), ## calculate intensity sum for each m/z rowSums(msidata_matrix > 0, na.rm=TRUE)) ## calculate number of intensities > 0 for each m/z (max = number of spectra) colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") ## free up RAM space in case furhter steps will be run: rm(mz_df) gc() #end if ###################### summarized m/z feature output ####################### #if str($tabular_annotation.load_annotation) == 'yes_annotation': print("summarized annotation output") ## read and extract x,y,annotation information input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] colnames(annotation_input) = c("x", "y", "annotation") ## merge with coordinate information of msidata msidata_coordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata))) colnames(msidata_coordinates) = c("x", "y", "pixel_index") merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) merged_annotation[is.na(merged_annotation)] = "NA" merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] msidata\$annotation = as.factor(merged_annotation[,4]) ## create m/z feature name mz_names = paste0("mz_", mz(msidata)) #if "mean" in str($tabular_annotation.summary_type).split(","): print("summarized mean") ## calculate mean per annotation group sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 for (subsample in levels(msidata\$annotation)){ subsample_pixels = msidata[,msidata\$annotation == subsample] subsample_calc = rowMeans(as.matrix(spectra(subsample_pixels)), na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1} sample_matrix_mean = cbind(mz_names,sample_matrix) sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") #end if #if "median" in str($tabular_annotation.summary_type).split(","): print("summarized median") sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 for (subsample in levels(msidata\$annotation)){ subsample_pixels = msidata[,msidata\$annotation == subsample] subsample_calc = apply(as.matrix(spectra(subsample_pixels)),1,median, na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1} sample_matrix_median = cbind(mz_names,sample_matrix) sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") #end if #if "sd" in str($tabular_annotation.summary_type).split(","): print("summarized sd") sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) count = 1 for (subsample in levels(msidata\$annotation)){ subsample_pixels = msidata[,msidata\$annotation == subsample] subsample_calc = apply(as.matrix(spectra(subsample_pixels)),1,sd, na.rm=TRUE) sample_matrix = cbind(sample_matrix, subsample_calc) count = count+1} sample_matrix_sd = cbind(mz_names,sample_matrix) sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") #end if #end if ############################ spectra (pixel) output ############################ #if "pixel_tabular" in str($output_options).split(","): print("pixel output") ## coordinates xycoordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y) colnames(xycoordinates) = c("x", "y") ## pixel name pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) ## Combine into dataframe; order is the same for all vectors spectra_df = data.frame(pixel_names, xycoordinates, 1:length(pixels(msidata)), ##pixel order colSums(msidata_matrix>0, na.rm=TRUE), ##peaks per pixel round(apply(msidata_matrix, 2, median, na.rm=TRUE), digits = 2), ## median intensity round(colSums(msidata_matrix, na.rm=TRUE), digits = 2), ##TICs round(apply(msidata_matrix, 2, max, na.rm=TRUE), digits = 2), ##max intensity mz(msidata)[apply(msidata_matrix,2,which.max) ]) ##highest mz colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] ### calculate how many input calibrant m/z are valid: inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] inputcalibrantmasses = inputcalibrants[,1] ##QC plot number 2) Number of calibrants per spectrum ## matrix with calibrants in columns and in rows if there is peak intensity in range or not pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) if (length(inputcalibrantmasses) != 0){ ## calculate plusminus values in m/z for each calibrant plusminusvalues = rep($counting_calibrants.plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 for (mass in 1:length(inputcalibrantmasses)){ filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] if (nrow(filtered_data) > 1 & sum(as.matrix(spectra(filtered_data)), na.rm=TRUE) > 0){ ## intensity of all m/z > 0 intensity_sum = colSums(as.matrix(spectra(filtered_data)), na.rm=TRUE) > 0 }else if(nrow(filtered_data) == 1 & sum(as.matrix(spectra(filtered_data)), na.rm=TRUE) > 0){ ## intensity of only m/z > 0 intensity_sum = as.matrix(spectra(filtered_data)) > 0 }else{ intensity_sum = rep(FALSE, ncol(filtered_data))} ## for each pixel add sum of intensities > 0 in the given m/z range pixelmatrix = rbind(pixelmatrix, intensity_sum) } ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE)) }else{countvector = rep(0,ncol(msidata))} countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts colnames(countdf) = c("x_values", "y_values", "mz_count") spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) ## sort columns to have spectra_names as rowname in first column spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count")] #end if #if str($tabular_annotation.load_annotation) == 'yes_annotation': colnames(annotation_input) = c("x_values", "y_values", "annotation") spectra_df = merge(spectra_df,annotation_input, by=c("x_values", "y_values"), all.x=TRUE) ## sort columns to have spectra_names as rowname in first column #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count", "annotation")] #else spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] #end if #end if ## sort rows according to original pixel order spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] ## Create list and output tabular write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") #end if ]]></configfile> </configfiles> <inputs> <expand macro="reading_msidata"/> <param name="output_options" type="select" optional="False" multiple="true" label="Multiple output files can be selected"> <option value="int_matrix" selected="True" >intensity matrix</option> <option value="mz_tabular">mz feature output</option> <option value="pixel_tabular">pixel output</option> </param> <conditional name="counting_calibrants"> <param name="pixel_with_calibrants" type="select" label="Use file with m/z of interest to calculate their occurrence in each spectrum"> <option value="no_calibrants" selected="True">no</option> <option value="yes_calibrants">yes</option> </param> <when value="no_calibrants"/> <when value="yes_calibrants"> <expand macro="reading_1_column_mz_tabular" label="For each spectrum the occurrence of the provided m/z values is counted"/> <param name="plusminus_ppm" value="200" type="float" label="ppm range will be added in both directions to input m/z" help="The m/z window is used to search for peaks, if intensity > 0 found in the window the m/z is considered present, if all intensities are 0 the m/z is considered not present"/> </when> </conditional> <conditional name="tabular_annotation"> <param name="load_annotation" type="select" label="Pixel annotation can be used to summarize intensities per annotation group"> <option value="no_annotation" selected="True">no</option> <option value="yes_annotation">yes</option> </param> <when value="no_annotation"/> <when value="yes_annotation"> <expand macro="reading_pixel_annotations"/> <param name="summary_type" type="select" optional="False" multiple="true" label="Calculation for each m/z and all pixels of a annotation group" help="This step will only work if pixel annotations are provided"> <option value="mean">mean</option> <option value="median">median</option> <option value="sd">standard deviation</option> </param> </when> </conditional> </inputs> <outputs> <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"> <filter>"int_matrix" in output_options</filter> </data> <data format="tabular" name="pixel_output" label="${tool.name} on ${on_string}: spectra"> <filter>"pixel_tabular" in output_options</filter> </data> <data format="tabular" name="feature_output" label="${tool.name} on ${on_string}: features"> <filter>"mz_tabular" in output_options</filter> </data> <data format="tabular" name="summarized_mean" label="${tool.name} on ${on_string}: group_mean"> <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'mean' in tabular_annotation['summary_type']</filter> </data> <data format="tabular" name="summarized_median" label="${tool.name} on ${on_string}: group_median"> <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'median' in tabular_annotation['summary_type']</filter> </data> <data format="tabular" name="summarized_sd" label="${tool.name} on ${on_string}: group_sd"> <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'sd' in tabular_annotation['summary_type']</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <expand macro="infile_imzml"/> <param name="output_options" value="int_matrix,mz_tabular"/> <output name="intensity_matrix" file="int_matrix1.tabular"/> <output name="feature_output" file="features_out1.tabular"/> </test> <test expect_num_outputs="3"> <expand macro="infile_analyze75"/> <param name="output_options" value="pixel_tabular"/> <conditional name="tabular_annotation"> <param name="load_annotation" value="yes_annotation"/> <param name="annotation_file" value="annotations.tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> <param name="column_names" value="4"/> <param name="tabular_header" value="True"/> <param name="summary_type" value="mean,sd"/> </conditional> <output name="pixel_output" file="pixel_out2.tabular"/> <output name="summarized_mean" file="mean_out2.tabular"/> <output name="summarized_sd" file="sd_out2.tabular"/> </test> <test expect_num_outputs="3"> <expand macro="infile_imzml"/> <param name="output_options" value="int_matrix,pixel_tabular,mz_tabular"/> <conditional name="counting_calibrants"> <param name="pixel_with_calibrants" value="yes_calibrants"/> <param name="mz_tabular" value="inputcalibrantfile2.txt"/> <param name="feature_column" value="1"/> <param name="feature_header" value="False"/> <param name="plusminus_ppm" value="200"/> </conditional> <output name="intensity_matrix" file="int_matrix3.tabular"/> <output name="feature_output" file="features_out3.tabular"/> <output name="pixel_output" file="pixel_out3.tabular"/> </test> <test expect_num_outputs="2"> <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <param name="output_options" value="pixel_tabular,mz_tabular"/> <conditional name="counting_calibrants"> <param name="pixel_with_calibrants" value="yes_calibrants"/> <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> <param name="feature_column" value="1"/> <param name="feature_header" value="False"/> <param name="plusminus_ppm" value="200"/> </conditional> <output name="feature_output"> <assert_contents> <has_text text="100.120072029209"/> </assert_contents> </output> <output name="pixel_output" file="pixel_out4.tabular"/> </test> </tests> <help> <![CDATA[ @CARDINAL_DESCRIPTION@ ----- This tool provides multiple tabular output options for mass spectrometry imaging data files. @MSIDATA_INPUT_DESCRIPTION@ @SPECTRA_TABULAR_INPUT_DESCRIPTION@ @MZ_TABULAR_INPUT_DESCRIPTION@ **Output options** - intensity matrix: m/z in rows, spectra in columns, filled with intensity values - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), median intensity, maximum intensity, highest m/z feature per spectrum, optional count of m/z per spectrum, optional spectrum annotation - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group ]]> </help> <expand macro="citations"/> </tool>