comparison data_exporter.xml @ 2:e30d8b72415f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author galaxyp
date Fri, 15 Feb 2019 10:17:43 -0500
parents 28ba52c9548c
children d94770c22f13
comparison
equal deleted inserted replaced
1:65ef413a35f3 2:e30d8b72415f
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.1">
2 <description> 2 <description>
3 exports imzML and Analyze7.5 to tabular files 3 exports imzML and Analyze7.5 to tabular files
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
20 20
21 ################################# load libraries and read file ################# 21 ################################# load libraries and read file #################
22 22
23 library(Cardinal) 23 library(Cardinal)
24 24
25 @READING_MSIDATA@ 25 @READING_MSIDATA_INRAM@
26 26
27 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) 27 ## to make sure that processed files work as well:
28 28 iData(msidata) = iData(msidata)[]
29 if (npeaks > 0){
30 29
31 ###################### Intensity matrix output ################################ 30 ###################### Intensity matrix output ################################
32 31
33 #if "int_matrix" in str($output_options).split(","): 32 #if "int_matrix" in str($output_options).split(","):
34 print("intensity matrix output") 33 print("intensity matrix output")
35 34
36 spectramatrix = spectra(msidata)[]
37 mz_names = gsub(" = ", "_", names(features(msidata))) 35 mz_names = gsub(" = ", "_", names(features(msidata)))
38 mz_names = gsub("/", "", mz_names) 36 mz_names = gsub("/", "", mz_names)
39 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) 37 pixel_names = gsub(", y = ", "_", names(pixels(msidata)))
40 pixel_names = gsub(" = ", "y_", pixel_names) 38 pixel_names = gsub(" = ", "y_", pixel_names)
41 39
42 spectramatrix = cbind(mz_names,spectramatrix) 40 spectramatrix = cbind(mz_names,spectra(msidata)[])
43 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) 41 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix)
44 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 42 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
43 ## free up RAM space in case furhter steps will be run:
44 rm(newmatrix)
45 rm(spectramatrix)
46 gc()
45 47
46 #end if 48 #end if
47 49
48 50
49 ############################## m/z feature output ########################## 51 ############################## m/z feature output ##########################
57 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) 59 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE)
58 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) 60 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE)
59 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) 61 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE)
60 full_sample_sem = full_sample_sd/full_sample_mean*100 62 full_sample_sem = full_sample_sd/full_sample_mean*100
61 ## npeaks and sum of all intensities per spectrum and mz 63 ## npeaks and sum of all intensities per spectrum and mz
64 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE)
62 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z 65 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z
63 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) 66 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra)
64 67
65 ## combine into dataframe, order is the same for all vectors 68 ## combine into dataframe, order is the same for all vectors
66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) 69 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz)
145 148
146 ## coordinates 149 ## coordinates
147 xycoordinates = coord(msidata)[,1:2] 150 xycoordinates = coord(msidata)[,1:2]
148 151
149 ## pixel name 152 ## pixel name
150 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) 153 pixel_names = paste0("xy_", xycoordinates\$x, "_", xycoordinates\$y)
151 pixel_names = gsub(" = ", "y_", pixel_names)
152 154
153 ## pixel order 155 ## pixel order
154 pixelxyarray=1:length(pixels(msidata)) 156 pixelxyarray=1:length(pixels(msidata))
155 157
156 ## number of pixels per spectrum: every intensity value > 0 counts as peak 158 ## number of pixels per spectrum: every intensity value > 0 counts as peak
157 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) 159 peaksperpixel = apply(spectra(msidata)[]> 0, 2, sum, na.rm=TRUE)
158 160
159 ## Total ion chromatogram per spectrum 161 ## Total ion chromatogram per spectrum
160 TICs = round(colSums(spectra(msidata)[], na.rm=TRUE), digits = 2) 162 TICs = round(apply(spectra(msidata)[],2, sum, na.rm=TRUE), digits = 2)
163
164 ## Median ion intensity per spectrum
165 med_int = round(apply(spectra(msidata)[], 2, median, na.rm=TRUE), digits = 2)
166
167 ## Maximum ion intensity per spectrum
168 max_int = round(apply(spectra(msidata)[], 2, max, na.rm=TRUE), digits = 2)
161 169
162 ## Highest m/z per spectrum 170 ## Highest m/z per spectrum
163 highestmz = apply(spectra(msidata)[],2,which.max) 171 highestmz = apply(spectra(msidata)[],2,which.max)
164 highestmz_data = mz(msidata)[highestmz] 172 highestmz_data = mz(msidata)[highestmz]
165 173
166 ## Combine into dataframe; order is the same for all vectors 174 ## Combine into dataframe; order is the same for all vectors
167 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, TICs, highestmz_data) 175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data)
168 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz") 176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz")
169 177
170 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": 178 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
171 179
172 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) 180 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE)
173 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] 181 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE]
189 197
190 for (mass in 1:length(inputcalibrantmasses)){ 198 for (mass in 1:length(inputcalibrantmasses)){
191 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] 199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
192 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ 200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
193 ## intensity of all m/z > 0 201 ## intensity of all m/z > 0
194 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 202 intensity_sum = apply(spectra(filtered_data)[],2,sum, na.rm=TRUE) > 0
203
195 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ 204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
196 ## intensity of only m/z > 0 205 ## intensity of only m/z > 0
197 intensity_sum = spectra(filtered_data)[] > 0 206 intensity_sum = spectra(filtered_data)[] > 0
198 }else{ 207 }else{
199 intensity_sum = rep(FALSE, ncol(filtered_data))} 208 intensity_sum = rep(FALSE, ncol(filtered_data))}
200 ## for each pixel add sum of intensities > 0 in the given m/z range 209 ## for each pixel add sum of intensities > 0 in the given m/z range
201 pixelmatrix = rbind(pixelmatrix, intensity_sum) 210 pixelmatrix = rbind(pixelmatrix, intensity_sum)
202 } 211 }
203 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) 212 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
204 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) 213 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE))
214
205 }else{countvector = rep(0,ncol(msidata))} 215 }else{countvector = rep(0,ncol(msidata))}
206 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts 216 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts
207 colnames(countdf) = c("x_values", "y_values", "input m/z count") 217 colnames(countdf) = c("x_values", "y_values", "m/z count")
208 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) 218 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values"))
209 219
210 ## sort columns to have spectra_names as rowname in first column 220 ## sort columns to have spectra_names as rowname in first column
211 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count")] 221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count")]
212 222
213 #end if 223 #end if
214 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 224 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
215 225
216 colnames(annotation_input) = c("x_values", "y_values", "annotation") 226 colnames(annotation_input) = c("x_values", "y_values", "annotation")
217 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values")) 227 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values"))
218 228
219 ## sort columns to have spectra_names as rowname in first column 229 ## sort columns to have spectra_names as rowname in first column
220 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": 230 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count", "annotation")] 231 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count", "annotation")]
222 #else 232 #else
223 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "annotation")] 233 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")]
224 #end if 234 #end if
225 235
226 #end if 236 #end if
227 ## sort rows according to original pixel order 237 ## sort rows according to original pixel order
228 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] 238 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),]
229 239
230 ## Create list and output tabular 240 ## Create list and output tabular
231 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 241 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
232 #end if 242 #end if
233
234
235 }else{
236 print("file has no features or pixels left")
237 }
238 243
239 244
240 ]]></configfile> 245 ]]></configfile>
241 </configfiles> 246 </configfiles>
242 <inputs> 247 <inputs>
245 <option value="int_matrix" selected="True" >intensity matrix</option> 250 <option value="int_matrix" selected="True" >intensity matrix</option>
246 <option value="mz_tabular">mz feature output</option> 251 <option value="mz_tabular">mz feature output</option>
247 <option value="pixel_tabular">pixel output</option> 252 <option value="pixel_tabular">pixel output</option>
248 </param> 253 </param>
249 <conditional name="counting_calibrants"> 254 <conditional name="counting_calibrants">
250 <param name="pixel_with_calibrants" type="select" label="Add number of m/z of interest per spectrum to pixel output"> 255 <param name="pixel_with_calibrants" type="select" label="Use file with m/z of interest to calculate their occurrence in each spectrum">
251 <option value="no_calibrants" selected="True">no</option> 256 <option value="no_calibrants" selected="True">no</option>
252 <option value="yes_calibrants">yes</option> 257 <option value="yes_calibrants">yes</option>
253 </param> 258 </param>
254 <when value="no_calibrants"/> 259 <when value="no_calibrants"/>
255 <when value="yes_calibrants"> 260 <when value="yes_calibrants">
347 @MZ_TABULAR_INPUT_DESCRIPTION@ 352 @MZ_TABULAR_INPUT_DESCRIPTION@
348 353
349 **Output options** 354 **Output options**
350 355
351 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values 356 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values
352 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), highest m/z feature per spectrum, optional count of input m/z per spectrum, optional spectrum annotation 357 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), median intensity, maximum intensity, highest m/z feature per spectrum, optional count of m/z per spectrum, optional spectrum annotation
353 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z 358 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z
354 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group 359 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group
355 360
356 ]]> 361 ]]>
357 </help> 362 </help>