comparison data_exporter.xml @ 0:28ba52c9548c draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author galaxyp
date Mon, 01 Oct 2018 01:05:33 -0400
parents
children e30d8b72415f
comparison
equal deleted inserted replaced
-1:000000000000 0:28ba52c9548c
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0">
2 <description>
3 exports imzML and Analyze7.5 to tabular files
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements"/>
9 <command detect_errors="exit_code">
10 <![CDATA[
11
12 @INPUT_LINKING@
13 cat '${cardinal_imzml_exporter}' &&
14 Rscript '${cardinal_imzml_exporter}'
15
16 ]]>
17 </command>
18 <configfiles>
19 <configfile name="cardinal_imzml_exporter"><![CDATA[
20
21 ################################# load libraries and read file #################
22
23 library(Cardinal)
24
25 @READING_MSIDATA@
26
27 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE)
28
29 if (npeaks > 0){
30
31 ###################### Intensity matrix output ################################
32
33 #if "int_matrix" in str($output_options).split(","):
34 print("intensity matrix output")
35
36 spectramatrix = spectra(msidata)[]
37 mz_names = gsub(" = ", "_", names(features(msidata)))
38 mz_names = gsub("/", "", mz_names)
39 pixel_names = gsub(", y = ", "_", names(pixels(msidata)))
40 pixel_names = gsub(" = ", "y_", pixel_names)
41
42 spectramatrix = cbind(mz_names,spectramatrix)
43 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix)
44 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
45
46 #end if
47
48
49 ############################## m/z feature output ##########################
50 #if "mz_tabular" in str($output_options).split(","):
51 print("mz feature output")
52
53 mz_names = gsub(" = ", "_", names(features(msidata)))
54 mz_names = gsub("/", "", mz_names)
55
56 ## mean, median, sd and SEM intensity per file and mz
57 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE)
58 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE)
59 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE)
60 full_sample_sem = full_sample_sd/full_sample_mean*100
61 ## npeaks and sum of all intensities per spectrum and mz
62 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z
63 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra)
64
65 ## combine into dataframe, order is the same for all vectors
66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz)
67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks")
68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
69 #end if
70
71 ###################### summarized m/z feature output #######################
72
73 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
74 print("summarized annotation output")
75
76 ## read and extract x,y,annotation information
77 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
78 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
79 colnames(annotation_input) = c("x", "y", "annotation")
80
81 ## merge with coordinate information of msidata
82 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
83 colnames(msidata_coordinates)[3] = "pixel_index"
84 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
85 merged_annotation[is.na(merged_annotation)] = "NA"
86 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
87 msidata\$annotation = as.factor(merged_annotation[,4])
88
89 ## create m/z feature name
90 mz_names = gsub(" = ", "_", names(features(msidata)))
91 mz_names = gsub("/", "", mz_names)
92
93 #if "mean" in str($tabular_annotation.summary_type).split(","):
94 print("summarized mean")
95
96 ## calculate mean per annotation group
97 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
98 count = 1
99 for (subsample in levels(msidata\$annotation)){
100 subsample_pixels = msidata[,msidata\$annotation == subsample]
101 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
102 sample_matrix = cbind(sample_matrix, subsample_calc)
103 count = count+1}
104 sample_matrix_mean = cbind(mz_names,sample_matrix)
105 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean)
106 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
107 #end if
108
109 #if "median" in str($tabular_annotation.summary_type).split(","):
110 print("summarized median")
111
112 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
113 count = 1
114 for (subsample in levels(msidata\$annotation)){
115 subsample_pixels = msidata[,msidata\$annotation == subsample]
116 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
117 sample_matrix = cbind(sample_matrix, subsample_calc)
118 count = count+1}
119 sample_matrix_median = cbind(mz_names,sample_matrix)
120 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median)
121 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
122 #end if
123
124 #if "sd" in str($tabular_annotation.summary_type).split(","):
125 print("summarized sd")
126
127 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
128 count = 1
129 for (subsample in levels(msidata\$annotation)){
130 subsample_pixels = msidata[,msidata\$annotation == subsample]
131 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
132 sample_matrix = cbind(sample_matrix, subsample_calc)
133 count = count+1}
134 sample_matrix_sd = cbind(mz_names,sample_matrix)
135 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd)
136 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
137 #end if
138
139 #end if
140
141
142 ############################ spectra (pixel) output ############################
143 #if "pixel_tabular" in str($output_options).split(","):
144 print("pixel output")
145
146 ## coordinates
147 xycoordinates = coord(msidata)[,1:2]
148
149 ## pixel name
150 pixel_names = gsub(", y = ", "_", names(pixels(msidata)))
151 pixel_names = gsub(" = ", "y_", pixel_names)
152
153 ## pixel order
154 pixelxyarray=1:length(pixels(msidata))
155
156 ## number of pixels per spectrum: every intensity value > 0 counts as peak
157 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE)
158
159 ## Total ion chromatogram per spectrum
160 TICs = round(colSums(spectra(msidata)[], na.rm=TRUE), digits = 2)
161
162 ## Highest m/z per spectrum
163 highestmz = apply(spectra(msidata)[],2,which.max)
164 highestmz_data = mz(msidata)[highestmz]
165
166 ## Combine into dataframe; order is the same for all vectors
167 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, TICs, highestmz_data)
168 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz")
169
170 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
171
172 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE)
173 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE]
174 ### calculate how many input calibrant m/z are valid:
175 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE]
176 inputcalibrantmasses = inputcalibrants[,1]
177
178 ##QC plot number 2) Number of calibrants per spectrum
179
180 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not
181 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0)
182
183 if (length(inputcalibrantmasses) != 0){
184
185 ## calculate plusminus values in m/z for each calibrant
186 plusminusvalues = rep($counting_calibrants.plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses
187
188 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0
189
190 for (mass in 1:length(inputcalibrantmasses)){
191 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
192 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
193 ## intensity of all m/z > 0
194 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0
195 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
196 ## intensity of only m/z > 0
197 intensity_sum = spectra(filtered_data)[] > 0
198 }else{
199 intensity_sum = rep(FALSE, ncol(filtered_data))}
200 ## for each pixel add sum of intensities > 0 in the given m/z range
201 pixelmatrix = rbind(pixelmatrix, intensity_sum)
202 }
203 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
204 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE))
205 }else{countvector = rep(0,ncol(msidata))}
206 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts
207 colnames(countdf) = c("x_values", "y_values", "input m/z count")
208 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values"))
209
210 ## sort columns to have spectra_names as rowname in first column
211 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count")]
212
213 #end if
214 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
215
216 colnames(annotation_input) = c("x_values", "y_values", "annotation")
217 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values"))
218
219 ## sort columns to have spectra_names as rowname in first column
220 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants":
221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count", "annotation")]
222 #else
223 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "annotation")]
224 #end if
225
226 #end if
227 ## sort rows according to original pixel order
228 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),]
229
230 ## Create list and output tabular
231 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
232 #end if
233
234
235 }else{
236 print("file has no features or pixels left")
237 }
238
239
240 ]]></configfile>
241 </configfiles>
242 <inputs>
243 <expand macro="reading_msidata"/>
244 <param name="output_options" type="select" display="checkboxes" optional="False" multiple="true" label="Multiple output files can be selected">
245 <option value="int_matrix" selected="True" >intensity matrix</option>
246 <option value="mz_tabular">mz feature output</option>
247 <option value="pixel_tabular">pixel output</option>
248 </param>
249 <conditional name="counting_calibrants">
250 <param name="pixel_with_calibrants" type="select" label="Add number of m/z of interest per spectrum to pixel output">
251 <option value="no_calibrants" selected="True">no</option>
252 <option value="yes_calibrants">yes</option>
253 </param>
254 <when value="no_calibrants"/>
255 <when value="yes_calibrants">
256 <expand macro="reading_1_column_mz_tabular" label="For each spectrum the occurrence of the provided m/z values is counted"/>
257 <param name="plusminus_ppm" value="200" type="float" label="ppm range will be added in both directions to input m/z" help="The m/z window is used to search for peaks, if intensity > 0 found in the window the m/z is considered present, if all intensities are 0 the m/z is considered not present"/>
258 </when>
259 </conditional>
260 <conditional name="tabular_annotation">
261 <param name="load_annotation" type="select" label="Pixel annotation can be used to summarize intensities per annotation group">
262 <option value="no_annotation" selected="True">no</option>
263 <option value="yes_annotation">yes</option>
264 </param>
265 <when value="no_annotation"/>
266 <when value="yes_annotation">
267 <expand macro="reading_pixel_annotations"/>
268 <param name="summary_type" type="select" display="checkboxes" optional="False" multiple="true" label="Calculation for each m/z and all pixels of a annotation group" help="This step will only work if pixel annotations are provided">
269 <option value="mean">mean</option>
270 <option value="median">median</option>
271 <option value="sd">standard deviation</option>
272 </param>
273 </when>
274 </conditional>
275 </inputs>
276 <outputs>
277 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix">
278 <filter>"int_matrix" in output_options</filter>
279 </data>
280 <data format="tabular" name="pixel_output" label="${tool.name} on ${on_string}: spectra">
281 <filter>"pixel_tabular" in output_options</filter>
282 </data>
283 <data format="tabular" name="feature_output" label="${tool.name} on ${on_string}: features">
284 <filter>"mz_tabular" in output_options</filter>
285 </data>
286 <data format="tabular" name="summarized_mean" label="${tool.name} on ${on_string}: group_mean">
287 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'mean' in tabular_annotation['summary_type']</filter>
288 </data>
289 <data format="tabular" name="summarized_median" label="${tool.name} on ${on_string}: group_median">
290 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'median' in tabular_annotation['summary_type']</filter>
291 </data>
292 <data format="tabular" name="summarized_sd" label="${tool.name} on ${on_string}: group_sd">
293 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'sd' in tabular_annotation['summary_type']</filter>
294 </data>
295 </outputs>
296 <tests>
297 <test expect_num_outputs="2">
298 <expand macro="infile_imzml"/>
299 <param name="output_options" value="int_matrix,mz_tabular"/>
300 <output name="intensity_matrix" file="int_matrix1.tabular"/>
301 <output name="feature_output" file="features_out1.tabular"/>
302 </test>
303 <test expect_num_outputs="3">
304 <expand macro="infile_analyze75"/>
305 <param name="output_options" value="pixel_tabular"/>
306 <conditional name="tabular_annotation">
307 <param name="load_annotation" value="yes_annotation"/>
308 <param name="annotation_file" value="annotations.tabular"/>
309 <param name="column_x" value="1"/>
310 <param name="column_y" value="2"/>
311 <param name="column_names" value="4"/>
312 <param name="tabular_header" value="True"/>
313 <param name="summary_type" value="mean,sd"/>
314 </conditional>
315 <output name="pixel_output" file="pixel_out2.tabular"/>
316 <output name="summarized_mean" file="mean_out2.tabular"/>
317 <output name="summarized_sd" file="sd_out2.tabular"/>
318 </test>
319 <test expect_num_outputs="3">
320 <expand macro="infile_imzml"/>
321 <param name="output_options" value="int_matrix,pixel_tabular,mz_tabular"/>
322 <conditional name="counting_calibrants">
323 <param name="pixel_with_calibrants" value="yes_calibrants"/>
324 <param name="mz_tabular" value="inputcalibrantfile2.txt"/>
325 <param name="feature_column" value="1"/>
326 <param name="feature_header" value="False"/>
327 <param name="plusminus_ppm" value="200"/>
328 </conditional>
329 <output name="intensity_matrix" file="int_matrix3.tabular"/>
330 <output name="feature_output" file="features_out3.tabular"/>
331 <output name="pixel_output" file="pixel_out3.tabular"/>
332 </test>
333 </tests>
334 <help>
335 <![CDATA[
336
337 @CARDINAL_DESCRIPTION@
338
339 -----
340
341 This tool provides multiple tabular output options for mass spectrometry imaging data files.
342
343 @MSIDATA_INPUT_DESCRIPTION@
344
345 @SPECTRA_TABULAR_INPUT_DESCRIPTION@
346
347 @MZ_TABULAR_INPUT_DESCRIPTION@
348
349 **Output options**
350
351 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values
352 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), highest m/z feature per spectrum, optional count of input m/z per spectrum, optional spectrum annotation
353 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z
354 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group
355
356 ]]>
357 </help>
358 <expand macro="citations"/>
359 </tool>