comparison data_exporter.xml @ 7:350a84ea795c draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f986c51abe33c7f622d429a3c4a79ee24b33c1f3"
author galaxyp
date Thu, 23 Apr 2020 08:06:24 -0400
parents e521b5767819
children a5d09f2daf71
comparison
equal deleted inserted replaced
6:8da3511c9467 7:350a84ea795c
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.2"> 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0">
2 <description> 2 <description>
3 exports imzML and Analyze7.5 to tabular files 3 exports imzML and Analyze7.5 to tabular files
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
20 20
21 ################################# load libraries and read file ################# 21 ################################# load libraries and read file #################
22 22
23 library(Cardinal) 23 library(Cardinal)
24 24
25 @READING_MSIDATA_INRAM@ 25 @READING_MSIDATA@
26 26
27 27 ## in case RData input is MSImageSet:
28 if (class(msidata) == "MSImageSet"){
29 msidata = as(msidata, "MSImagingExperiment")
30 run(msidata) = "infile"
31 }
32
33 ## extract spectra matrix once:
34 msidata_matrix = as.matrix(iData(msidata))
28 ###################### Intensity matrix output ################################ 35 ###################### Intensity matrix output ################################
29 36
30 #if "int_matrix" in str($output_options).split(","): 37 #if "int_matrix" in str($output_options).split(","):
31 print("intensity matrix output") 38 print("intensity matrix output")
32 39
33 mz_names = gsub(" = ", "_", names(features(msidata))) 40 mz_names = paste0("mz_", mz(msidata))
34 mz_names = gsub("/", "", mz_names) 41 pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y)
35 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) 42
36 pixel_names = gsub(" = ", "y_", pixel_names) 43 write.table(
37 44 rbind(c("mz_name", pixel_names), cbind(mz_names,msidata_matrix)), ##create matrix
38 ##spectramatrix = cbind(mz_names,spectra(msidata)) 45 file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
39 newmatrix = rbind(c("mz_name", pixel_names), cbind(mz_names,spectra(msidata)))
40 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
41 ## free up RAM space in case furhter steps will be run: 46 ## free up RAM space in case furhter steps will be run:
42 rm(newmatrix) 47 rm(newmatrix)
43 gc() 48 gc()
44 49
45 #end if 50 #end if
47 52
48 ############################## m/z feature output ########################## 53 ############################## m/z feature output ##########################
49 #if "mz_tabular" in str($output_options).split(","): 54 #if "mz_tabular" in str($output_options).split(","):
50 print("mz feature output") 55 print("mz feature output")
51 56
52 mz_names = gsub(" = ", "_", names(features(msidata))) 57 full_sample_mean = apply(msidata_matrix,1,mean, na.rm=TRUE)
53 mz_names = gsub("/", "", mz_names) 58 full_sample_sd = apply(msidata_matrix,1,sd, na.rm=TRUE)
54 59
55 ## mean, median, sd and SEM intensity per file and mz 60 mz_df = data.frame(paste0("mz_", mz(msidata)), ##mz names
56 full_sample_mean = rowMeans(spectra(msidata), na.rm=TRUE) 61 mz(msidata), ##mz values
57 full_sample_median = apply(spectra(msidata),1,median, na.rm=TRUE) 62 full_sample_mean, ##mean
58 full_sample_sd = apply(spectra(msidata),1,sd, na.rm=TRUE) 63 apply(msidata_matrix,1,median, na.rm=TRUE), ##median
59 full_sample_sem = full_sample_sd/full_sample_mean*100 64 full_sample_sd, ##sd
60 ## npeaks and sum of all intensities per spectrum and mz 65 full_sample_sd/full_sample_mean*100, ##SEM
61 npeaks= sum(spectra(msidata)>0, na.rm=TRUE) 66 rowSums(msidata_matrix, na.rm=TRUE), ## calculate intensity sum for each m/z
62 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z 67 rowSums(msidata_matrix > 0, na.rm=TRUE)) ## calculate number of intensities > 0 for each m/z (max = number of spectra)
63 peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) 68
64
65 ## combine into dataframe, order is the same for all vectors
66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz)
67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") 69 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks")
68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 70 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
69 ## free up RAM space in case furhter steps will be run: 71 ## free up RAM space in case furhter steps will be run:
70 rm(mz_df) 72 rm(mz_df)
71 gc() 73 gc()
80 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) 82 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
81 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] 83 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
82 colnames(annotation_input) = c("x", "y", "annotation") 84 colnames(annotation_input) = c("x", "y", "annotation")
83 85
84 ## merge with coordinate information of msidata 86 ## merge with coordinate information of msidata
85 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) 87 msidata_coordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y, c(1:ncol(msidata)))
86 colnames(msidata_coordinates)[3] = "pixel_index" 88 colnames(msidata_coordinates) = c("x", "y", "pixel_index")
87 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) 89 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
88 merged_annotation[is.na(merged_annotation)] = "NA" 90 merged_annotation[is.na(merged_annotation)] = "NA"
89 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] 91 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
90 msidata\$annotation = as.factor(merged_annotation[,4]) 92 msidata\$annotation = as.factor(merged_annotation[,4])
91 93
92 ## create m/z feature name 94 ## create m/z feature name
93 mz_names = gsub(" = ", "_", names(features(msidata))) 95 mz_names = paste0("mz_", mz(msidata))
94 mz_names = gsub("/", "", mz_names)
95 96
96 #if "mean" in str($tabular_annotation.summary_type).split(","): 97 #if "mean" in str($tabular_annotation.summary_type).split(","):
97 print("summarized mean") 98 print("summarized mean")
98 99
99 ## calculate mean per annotation group 100 ## calculate mean per annotation group
129 130
130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) 131 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
131 count = 1 132 count = 1
132 for (subsample in levels(msidata\$annotation)){ 133 for (subsample in levels(msidata\$annotation)){
133 subsample_pixels = msidata[,msidata\$annotation == subsample] 134 subsample_pixels = msidata[,msidata\$annotation == subsample]
134 subsample_calc = apply(spectra(subsample_pixels),1,sd, na.rm=TRUE) 135 subsample_calc = apply(as.matrix(spectra(subsample_pixels)),1,sd, na.rm=TRUE)
135 sample_matrix = cbind(sample_matrix, subsample_calc) 136 sample_matrix = cbind(sample_matrix, subsample_calc)
136 count = count+1} 137 count = count+1}
137 sample_matrix_sd = cbind(mz_names,sample_matrix) 138 sample_matrix_sd = cbind(mz_names,sample_matrix)
138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) 139 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd)
139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 140 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
145 ############################ spectra (pixel) output ############################ 146 ############################ spectra (pixel) output ############################
146 #if "pixel_tabular" in str($output_options).split(","): 147 #if "pixel_tabular" in str($output_options).split(","):
147 print("pixel output") 148 print("pixel output")
148 149
149 ## coordinates 150 ## coordinates
150 xycoordinates = coord(msidata)[,1:2] 151 xycoordinates = data.frame(coord(msidata)\$x, coord(msidata)\$y)
152 colnames(xycoordinates) = c("x", "y")
151 153
152 ## pixel name 154 ## pixel name
153 pixel_names = paste0("xy_", xycoordinates\$x, "_", xycoordinates\$y) 155 pixel_names = paste0("xy_", coord(msidata)\$x, "_", coord(msidata)\$y)
154
155 ## pixel order
156 pixelxyarray=1:length(pixels(msidata))
157
158 ## number of pixels per spectrum: every intensity value > 0 counts as peak
159 peaksperpixel = colSums(spectra(msidata)>0, na.rm=TRUE)
160
161 ## Total ion chromatogram per spectrum
162 TICs = round(colSums(spectra(msidata), na.rm=TRUE), digits = 2)
163
164 ## Median ion intensity per spectrum
165 med_int = round(apply(spectra(msidata), 2, median, na.rm=TRUE), digits = 2)
166
167 ## Maximum ion intensity per spectrum
168 max_int = round(apply(spectra(msidata), 2, max, na.rm=TRUE), digits = 2)
169
170 ## Highest m/z per spectrum
171 highestmz = apply(spectra(msidata),2,which.max)
172 highestmz_data = mz(msidata)[highestmz]
173 156
174 ## Combine into dataframe; order is the same for all vectors 157 ## Combine into dataframe; order is the same for all vectors
175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) 158 spectra_df = data.frame(pixel_names,
159 xycoordinates,
160 1:length(pixels(msidata)), ##pixel order
161 colSums(msidata_matrix>0, na.rm=TRUE), ##peaks per pixel
162 round(apply(msidata_matrix, 2, median, na.rm=TRUE), digits = 2), ## median intensity
163 round(colSums(msidata_matrix, na.rm=TRUE), digits = 2), ##TICs
164 round(apply(msidata_matrix, 2, max, na.rm=TRUE), digits = 2), ##max intensity
165 mz(msidata)[apply(msidata_matrix,2,which.max) ]) ##highest mz
166
176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") 167 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz")
177 168
178 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": 169 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
179 170
180 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) 171 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE)
181 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] 172 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE]
173
182 ### calculate how many input calibrant m/z are valid: 174 ### calculate how many input calibrant m/z are valid:
183 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] 175 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE]
176
184 inputcalibrantmasses = inputcalibrants[,1] 177 inputcalibrantmasses = inputcalibrants[,1]
185 178
186 ##QC plot number 2) Number of calibrants per spectrum 179 ##QC plot number 2) Number of calibrants per spectrum
187 180
188 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not 181 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not
212 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) 205 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
213 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE)) 206 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE))
214 207
215 }else{countvector = rep(0,ncol(msidata))} 208 }else{countvector = rep(0,ncol(msidata))}
216 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts 209 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts
217 colnames(countdf) = c("x_values", "y_values", "m/z count") 210 colnames(countdf) = c("x_values", "y_values", "mz_count")
218 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) 211 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values"))
219 212
220 ## sort columns to have spectra_names as rowname in first column 213 ## sort columns to have spectra_names as rowname in first column
221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count")] 214 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count")]
222 215
223 #end if 216 #end if
224 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 217 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
225 218
226 colnames(annotation_input) = c("x_values", "y_values", "annotation") 219 colnames(annotation_input) = c("x_values", "y_values", "annotation")
227 spectra_df = merge(spectra_df,annotation_input, by=c("x_values", "y_values"), all.x=TRUE) 220 spectra_df = merge(spectra_df,annotation_input, by=c("x_values", "y_values"), all.x=TRUE)
228 221
229 ## sort columns to have spectra_names as rowname in first column 222 ## sort columns to have spectra_names as rowname in first column
230 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": 223 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
231 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count", "annotation")] 224 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "mz_count", "annotation")]
232 #else 225 #else
233 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] 226 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")]
234 #end if 227 #end if
235 228
236 #end if 229 #end if
333 </conditional> 326 </conditional>
334 <output name="intensity_matrix" file="int_matrix3.tabular"/> 327 <output name="intensity_matrix" file="int_matrix3.tabular"/>
335 <output name="feature_output" file="features_out3.tabular"/> 328 <output name="feature_output" file="features_out3.tabular"/>
336 <output name="pixel_output" file="pixel_out3.tabular"/> 329 <output name="pixel_output" file="pixel_out3.tabular"/>
337 </test> 330 </test>
331 <test expect_num_outputs="2">
332 <expand macro="processed_infile_imzml"/>
333 <conditional name="processed_cond">
334 <param name="processed_file" value="processed"/>
335 <param name="accuracy" value="100"/>
336 <param name="units" value="ppm"/>
337 </conditional>
338 <param name="output_options" value="pixel_tabular,mz_tabular"/>
339 <conditional name="counting_calibrants">
340 <param name="pixel_with_calibrants" value="yes_calibrants"/>
341 <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/>
342 <param name="feature_column" value="1"/>
343 <param name="feature_header" value="False"/>
344 <param name="plusminus_ppm" value="200"/>
345 </conditional>
346 <output name="feature_output" file="features_out4.tabular"/>
347 <output name="pixel_output" file="pixel_out4.tabular"/>
348 </test>
338 </tests> 349 </tests>
339 <help> 350 <help>
340 <![CDATA[ 351 <![CDATA[
341 352
342 @CARDINAL_DESCRIPTION@ 353 @CARDINAL_DESCRIPTION@