Mercurial > repos > galaxyp > cardinal_data_exporter
comparison data_exporter.xml @ 7:350a84ea795c draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f986c51abe33c7f622d429a3c4a79ee24b33c1f3"
author | galaxyp |
---|---|
date | Thu, 23 Apr 2020 08:06:24 -0400 |
parents | e521b5767819 |
children | a5d09f2daf71 |
comparison
equal
deleted
inserted
replaced
6:8da3511c9467 | 7:350a84ea795c |
---|---|
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.2"> | 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> |
2 <description> | 2 <description> |
3 exports imzML and Analyze7.5 to tabular files | 3 exports imzML and Analyze7.5 to tabular files |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>macros.xml</import> | 6 <import>macros.xml</import> |
20 | 20 |
21 ################################# load libraries and read file ################# | 21 ################################# load libraries and read file ################# |
22 | 22 |
23 library(Cardinal) | 23 library(Cardinal) |
24 | 24 |
25 @READING_MSIDATA_INRAM@ | 25 @READING_MSIDATA@ |
26 | 26 |
27 | 27 ## in case RData input is MSImageSet: |
28 if (class(msidata) == "MSImageSet"){ | |
29 msidata = as(msidata, "MSImagingExperiment") | |
30 run(msidata) = "infile" | |
31 } | |
32 | |
33 ## extract spectra matrix once: | |
34 msidata_matrix = as.matrix(iData(msidata)) | |
28 ###################### Intensity matrix output ################################ | 35 ###################### Intensity matrix output ################################ |
29 | 36 |
30 #if "int_matrix" in str($output_options).split(","): | 37 #if "int_matrix" in str($output_options).split(","): |
31 print("intensity matrix output") | 38 print("intensity matrix output") |
32 | 39 |
33 mz_names = gsub(" = ", "_", names(features(msidata))) | 40 mz_names = paste0("mz_", mz(msidata)) |
34 mz_names = gsub("/", "", mz_names) | 41 pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) |
35 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | 42 |
36 pixel_names = gsub(" = ", "y_", pixel_names) | 43 write.table( |
37 | 44 rbind(c("mz_name", pixel_names), cbind(mz_names,msidata_matrix)), ##create matrix |
38 ##spectramatrix = cbind(mz_names,spectra(msidata)) | 45 file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
39 newmatrix = rbind(c("mz_name", pixel_names), cbind(mz_names,spectra(msidata))) | |
40 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
41 ## free up RAM space in case furhter steps will be run: | 46 ## free up RAM space in case furhter steps will be run: |
42 rm(newmatrix) | 47 rm(newmatrix) |
43 gc() | 48 gc() |
44 | 49 |
45 #end if | 50 #end if |
47 | 52 |
48 ############################## m/z feature output ########################## | 53 ############################## m/z feature output ########################## |
49 #if "mz_tabular" in str($output_options).split(","): | 54 #if "mz_tabular" in str($output_options).split(","): |
50 print("mz feature output") | 55 print("mz feature output") |
51 | 56 |
52 mz_names = gsub(" = ", "_", names(features(msidata))) | 57 full_sample_mean = apply(msidata_matrix,1,mean, na.rm=TRUE) |
53 mz_names = gsub("/", "", mz_names) | 58 full_sample_sd = apply(msidata_matrix,1,sd, na.rm=TRUE) |
54 | 59 |
55 ## mean, median, sd and SEM intensity per file and mz | 60 mz_df = data.frame(paste0("mz_", mz(msidata)), ##mz names |
56 full_sample_mean = rowMeans(spectra(msidata), na.rm=TRUE) | 61 mz(msidata), ##mz values |
57 full_sample_median = apply(spectra(msidata),1,median, na.rm=TRUE) | 62 full_sample_mean, ##mean |
58 full_sample_sd = apply(spectra(msidata),1,sd, na.rm=TRUE) | 63 apply(msidata_matrix,1,median, na.rm=TRUE), ##median |
59 full_sample_sem = full_sample_sd/full_sample_mean*100 | 64 full_sample_sd, ##sd |
60 ## npeaks and sum of all intensities per spectrum and mz | 65 full_sample_sd/full_sample_mean*100, ##SEM |
61 npeaks= sum(spectra(msidata)>0, na.rm=TRUE) | 66 rowSums(msidata_matrix, na.rm=TRUE), ## calculate intensity sum for each m/z |
62 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z | 67 rowSums(msidata_matrix > 0, na.rm=TRUE)) ## calculate number of intensities > 0 for each m/z (max = number of spectra) |
63 peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) | 68 |
64 | |
65 ## combine into dataframe, order is the same for all vectors | |
66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) | |
67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") | 69 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") |
68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 70 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
69 ## free up RAM space in case furhter steps will be run: | 71 ## free up RAM space in case furhter steps will be run: |
70 rm(mz_df) | 72 rm(mz_df) |
71 gc() | 73 gc() |
80 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | 82 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) |
81 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | 83 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] |
82 colnames(annotation_input) = c("x", "y", "annotation") | 84 colnames(annotation_input) = c("x", "y", "annotation") |
83 | 85 |
84 ## merge with coordinate information of msidata | 86 ## merge with coordinate information of msidata |
85 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | 87 msidata_coordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata))) |
86 colnames(msidata_coordinates)[3] = "pixel_index" | 88 colnames(msidata_coordinates) = c("x", "y", "pixel_index") |
87 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) | 89 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) |
88 merged_annotation[is.na(merged_annotation)] = "NA" | 90 merged_annotation[is.na(merged_annotation)] = "NA" |
89 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | 91 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] |
90 msidata\$annotation = as.factor(merged_annotation[,4]) | 92 msidata\$annotation = as.factor(merged_annotation[,4]) |
91 | 93 |
92 ## create m/z feature name | 94 ## create m/z feature name |
93 mz_names = gsub(" = ", "_", names(features(msidata))) | 95 mz_names = paste0("mz_", mz(msidata)) |
94 mz_names = gsub("/", "", mz_names) | |
95 | 96 |
96 #if "mean" in str($tabular_annotation.summary_type).split(","): | 97 #if "mean" in str($tabular_annotation.summary_type).split(","): |
97 print("summarized mean") | 98 print("summarized mean") |
98 | 99 |
99 ## calculate mean per annotation group | 100 ## calculate mean per annotation group |
129 | 130 |
130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | 131 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) |
131 count = 1 | 132 count = 1 |
132 for (subsample in levels(msidata\$annotation)){ | 133 for (subsample in levels(msidata\$annotation)){ |
133 subsample_pixels = msidata[,msidata\$annotation == subsample] | 134 subsample_pixels = msidata[,msidata\$annotation == subsample] |
134 subsample_calc = apply(spectra(subsample_pixels),1,sd, na.rm=TRUE) | 135 subsample_calc = apply(as.matrix(spectra(subsample_pixels)),1,sd, na.rm=TRUE) |
135 sample_matrix = cbind(sample_matrix, subsample_calc) | 136 sample_matrix = cbind(sample_matrix, subsample_calc) |
136 count = count+1} | 137 count = count+1} |
137 sample_matrix_sd = cbind(mz_names,sample_matrix) | 138 sample_matrix_sd = cbind(mz_names,sample_matrix) |
138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) | 139 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) |
139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 140 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
145 ############################ spectra (pixel) output ############################ | 146 ############################ spectra (pixel) output ############################ |
146 #if "pixel_tabular" in str($output_options).split(","): | 147 #if "pixel_tabular" in str($output_options).split(","): |
147 print("pixel output") | 148 print("pixel output") |
148 | 149 |
149 ## coordinates | 150 ## coordinates |
150 xycoordinates = coord(msidata)[,1:2] | 151 xycoordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y) |
152 colnames(xycoordinates) = c("x", "y") | |
151 | 153 |
152 ## pixel name | 154 ## pixel name |
153 pixel_names = paste0("xy_", xycoordinates\$x, "_", xycoordinates\$y) | 155 pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y) |
154 | |
155 ## pixel order | |
156 pixelxyarray=1:length(pixels(msidata)) | |
157 | |
158 ## number of pixels per spectrum: every intensity value > 0 counts as peak | |
159 peaksperpixel = colSums(spectra(msidata)>0, na.rm=TRUE) | |
160 | |
161 ## Total ion chromatogram per spectrum | |
162 TICs = round(colSums(spectra(msidata), na.rm=TRUE), digits = 2) | |
163 | |
164 ## Median ion intensity per spectrum | |
165 med_int = round(apply(spectra(msidata), 2, median, na.rm=TRUE), digits = 2) | |
166 | |
167 ## Maximum ion intensity per spectrum | |
168 max_int = round(apply(spectra(msidata), 2, max, na.rm=TRUE), digits = 2) | |
169 | |
170 ## Highest m/z per spectrum | |
171 highestmz = apply(spectra(msidata),2,which.max) | |
172 highestmz_data = mz(msidata)[highestmz] | |
173 | 156 |
174 ## Combine into dataframe; order is the same for all vectors | 157 ## Combine into dataframe; order is the same for all vectors |
175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) | 158 spectra_df = data.frame(pixel_names, |
159 xycoordinates, | |
160 1:length(pixels(msidata)), ##pixel order | |
161 colSums(msidata_matrix>0, na.rm=TRUE), ##peaks per pixel | |
162 round(apply(msidata_matrix, 2, median, na.rm=TRUE), digits = 2), ## median intensity | |
163 round(colSums(msidata_matrix, na.rm=TRUE), digits = 2), ##TICs | |
164 round(apply(msidata_matrix, 2, max, na.rm=TRUE), digits = 2), ##max intensity | |
165 mz(msidata)[apply(msidata_matrix,2,which.max) ]) ##highest mz | |
166 | |
176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") | 167 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") |
177 | 168 |
178 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | 169 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": |
179 | 170 |
180 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) | 171 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) |
181 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] | 172 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] |
173 | |
182 ### calculate how many input calibrant m/z are valid: | 174 ### calculate how many input calibrant m/z are valid: |
183 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] | 175 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] |
176 | |
184 inputcalibrantmasses = inputcalibrants[,1] | 177 inputcalibrantmasses = inputcalibrants[,1] |
185 | 178 |
186 ##QC plot number 2) Number of calibrants per spectrum | 179 ##QC plot number 2) Number of calibrants per spectrum |
187 | 180 |
188 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not | 181 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not |
212 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) | 205 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) |
213 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE)) | 206 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE)) |
214 | 207 |
215 }else{countvector = rep(0,ncol(msidata))} | 208 }else{countvector = rep(0,ncol(msidata))} |
216 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts | 209 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts |
217 colnames(countdf) = c("x_values", "y_values", "m/z count") | 210 colnames(countdf) = c("x_values", "y_values", "mz_count") |
218 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) | 211 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) |
219 | 212 |
220 ## sort columns to have spectra_names as rowname in first column | 213 ## sort columns to have spectra_names as rowname in first column |
221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count")] | 214 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count")] |
222 | 215 |
223 #end if | 216 #end if |
224 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 217 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
225 | 218 |
226 colnames(annotation_input) = c("x_values", "y_values", "annotation") | 219 colnames(annotation_input) = c("x_values", "y_values", "annotation") |
227 spectra_df = merge(spectra_df,annotation_input, by=c("x_values", "y_values"), all.x=TRUE) | 220 spectra_df = merge(spectra_df,annotation_input, by=c("x_values", "y_values"), all.x=TRUE) |
228 | 221 |
229 ## sort columns to have spectra_names as rowname in first column | 222 ## sort columns to have spectra_names as rowname in first column |
230 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | 223 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": |
231 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count", "annotation")] | 224 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count", "annotation")] |
232 #else | 225 #else |
233 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] | 226 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] |
234 #end if | 227 #end if |
235 | 228 |
236 #end if | 229 #end if |
333 </conditional> | 326 </conditional> |
334 <output name="intensity_matrix" file="int_matrix3.tabular"/> | 327 <output name="intensity_matrix" file="int_matrix3.tabular"/> |
335 <output name="feature_output" file="features_out3.tabular"/> | 328 <output name="feature_output" file="features_out3.tabular"/> |
336 <output name="pixel_output" file="pixel_out3.tabular"/> | 329 <output name="pixel_output" file="pixel_out3.tabular"/> |
337 </test> | 330 </test> |
331 <test expect_num_outputs="2"> | |
332 <expand macro="processed_infile_imzml"/> | |
333 <conditional name="processed_cond"> | |
334 <param name="processed_file" value="processed"/> | |
335 <param name="accuracy" value="100"/> | |
336 <param name="units" value="ppm"/> | |
337 </conditional> | |
338 <param name="output_options" value="pixel_tabular,mz_tabular"/> | |
339 <conditional name="counting_calibrants"> | |
340 <param name="pixel_with_calibrants" value="yes_calibrants"/> | |
341 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/> | |
342 <param name="feature_column" value="1"/> | |
343 <param name="feature_header" value="False"/> | |
344 <param name="plusminus_ppm" value="200"/> | |
345 </conditional> | |
346 <output name="feature_output" file="features_out4.tabular"/> | |
347 <output name="pixel_output" file="pixel_out4.tabular"/> | |
348 </test> | |
338 </tests> | 349 </tests> |
339 <help> | 350 <help> |
340 <![CDATA[ | 351 <![CDATA[ |
341 | 352 |
342 @CARDINAL_DESCRIPTION@ | 353 @CARDINAL_DESCRIPTION@ |