Mercurial > repos > galaxyp > cardinal_quality_report
comparison quality_report.xml @ 0:5f18275c250a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author | galaxyp |
---|---|
date | Mon, 01 Oct 2018 01:07:13 -0400 |
parents | |
children | ae9ffc7ba261 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5f18275c250a |
---|---|
1 <tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0"> | |
2 <description> | |
3 mass spectrometry imaging QC | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="requirements"> | |
9 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | |
10 <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> | |
11 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
12 <requirement type="package" version="2.23_15">r-kernsmooth</requirement> | |
13 <requirement type="package" version="0.5.0">r-scales</requirement> | |
14 <requirement type="package" version="1.0.8"> r-pheatmap</requirement> | |
15 </expand> | |
16 <command detect_errors="exit_code"> | |
17 <![CDATA[ | |
18 @INPUT_LINKING@ | |
19 cat '${cardinal_qualitycontrol_script}' && | |
20 Rscript '${cardinal_qualitycontrol_script}' | |
21 ]]> | |
22 </command> | |
23 <configfiles> | |
24 <configfile name="cardinal_qualitycontrol_script"><![CDATA[ | |
25 | |
26 ################################# load libraries and read file ################# | |
27 | |
28 library(Cardinal) | |
29 library(ggplot2) | |
30 library(RColorBrewer) | |
31 library(gridExtra) | |
32 library(KernSmooth) | |
33 library(scales) | |
34 library(pheatmap) | |
35 | |
36 @READING_MSIDATA@ | |
37 | |
38 ## remove duplicated coordinates | |
39 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) | |
40 msidata <- msidata[,!duplicated(coord(msidata))] | |
41 | |
42 ## create full matrix to make processed imzML files compatible with segmentation and other steps | |
43 iData(msidata) <- iData(msidata)[] | |
44 | |
45 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample) | |
46 | |
47 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | |
48 | |
49 ## read and extract x,y,annotation information | |
50 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | |
51 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | |
52 annotation_name = colnames(annotation_input)[3] ##extract header for annotations to later export tabular with same name | |
53 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" | |
54 | |
55 ## merge with coordinate information of msidata | |
56 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | |
57 colnames(msidata_coordinates)[3] = "pixel_index" | |
58 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) | |
59 merged_annotation[is.na(merged_annotation)] = "NA" | |
60 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | |
61 msidata\$annotation = as.factor(merged_annotation[,4]) | |
62 | |
63 #end if | |
64 | |
65 ###################### calculation of data properties ################################ | |
66 @DATA_PROPERTIES@ | |
67 | |
68 ## Median intensities | |
69 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
70 ## Spectra multiplied with m/z (potential number of peaks) | |
71 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
72 ## Percentage of intensities > 0 | |
73 percpeaks = round(npeaks/numpeaks*100, digits=2) | |
74 ## Number of empty TICs | |
75 TICs = colSums(spectra(msidata)[], na.rm=TRUE) | |
76 NumemptyTIC = sum(TICs == 0) | |
77 ## Median und sd TIC | |
78 medTIC = round(median(TICs), digits=1) | |
79 sdTIC = round(sd(TICs), digits=0) | |
80 ## Median and sd # peaks per spectrum | |
81 medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) | |
82 sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) | |
83 print(cor(TICs,colSums(spectra(msidata)[]>0), method="pearson")) | |
84 ## Processing informations | |
85 centroidedinfo = centroided(msidata) | |
86 | |
87 ############## Read and filter tabular file with m/z ########################### | |
88 | |
89 ### reading calibrant file: | |
90 | |
91 #if $calibrant_file: | |
92 | |
93 calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c("","NA"), stringsAsFactors = FALSE) | |
94 calibrant_list = calibrant_list[,c($mz_column, $name_column)] | |
95 | |
96 ### calculate how many input calibrant m/z are valid: | |
97 | |
98 inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,] | |
99 number_calibrants_in = length(calibrant_list[,1]) | |
100 number_calibrants_valid = length(inputcalibrants[,1]) | |
101 | |
102 #else | |
103 | |
104 inputcalibrants = as.data.frame(matrix(, nrow = 0, ncol = 2)) | |
105 number_calibrants_in = 0 | |
106 number_calibrants_valid = 0 | |
107 | |
108 #end if | |
109 | |
110 ## rename input dataframe and extract m/z | |
111 colnames(inputcalibrants) = c("m/z", "name") | |
112 inputcalibrantmasses = inputcalibrants[,1] | |
113 | |
114 | |
115 ######################################## PDF ############################################# | |
116 ########################################################################################## | |
117 ########################################################################################## | |
118 | |
119 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12) | |
120 plot(0,type='n',axes=FALSE,ann=FALSE) | |
121 | |
122 ## if no filename is given, name of file in Galaxy history is used | |
123 | |
124 #if not $filename: | |
125 #set $filename = $infile.display_name | |
126 #end if | |
127 | |
128 title(main=paste("$filename")) | |
129 | |
130 ################# I) file properties in numbers ################################ | |
131 ################################################################################ | |
132 print("properties in numbers") | |
133 | |
134 properties2 = c("Median of intensities", | |
135 "Intensities > 0", | |
136 "Number of empty spectra", | |
137 "Median TIC ± sd", | |
138 "Median # peaks per spectrum ± sd", | |
139 "Centroided", | |
140 paste0("calibrants (#valid/#input) in \n", "$calibrant_file.display_name")) | |
141 | |
142 values2 = c(paste0(medint), | |
143 paste0(percpeaks, " %"), | |
144 paste0(NumemptyTIC), | |
145 paste0(medTIC, " ± ", sdTIC), | |
146 paste0(medpeaks, " ± ",sdpeaks), | |
147 paste0(centroidedinfo), | |
148 paste0(number_calibrants_valid, " / ", number_calibrants_in)) | |
149 | |
150 property_df2 = data.frame(properties2, values2) | |
151 colnames(property_df2) = c("properties", "values") | |
152 | |
153 property_df = rbind(property_df, property_df2) | |
154 | |
155 grid.table(property_df, rows= NULL) | |
156 | |
157 | |
158 ####################### II) x-y images ####################################### | |
159 ############################################################################## | |
160 print("x-y images") | |
161 | |
162 ## only do plots for file with intensity peaks | |
163 if (npeaks > 0){ | |
164 ## function for density plots | |
165 plot_colorByDensity = function(x1,x2, | |
166 ylim=c(min(x2),max(x2)), | |
167 xlim=c(min(x1),max(x1)), | |
168 xlab="",ylab="",main=""){ | |
169 df = data.frame(x1,x2) | |
170 x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) | |
171 df\$dens = col2rgb(x)[1,] + 1L | |
172 cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) | |
173 df\$col = cols[df\$dens] | |
174 plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col, | |
175 cex=1,xlab=xlab,ylab=ylab,las=1, main=main)} | |
176 | |
177 | |
178 ################### 0) overview for combined data ########################### | |
179 | |
180 ### only for previously combined data, same plot as in combine QC pdf | |
181 | |
182 if (!is.null(levels(msidata\$annotation))){ | |
183 | |
184 number_combined = length(levels(msidata\$annotation)) | |
185 | |
186 position_df = cbind(coord(msidata)[,1:2], msidata\$annotation) | |
187 colnames(position_df)[3] = "annotation" | |
188 | |
189 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=annotation))+ | |
190 geom_tile() + | |
191 coord_fixed()+ | |
192 ggtitle("Spatial orientation of pixel annotations")+ | |
193 theme_bw()+ | |
194 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
195 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ | |
196 theme(legend.position="bottom",legend.direction="vertical")+ | |
197 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
198 | |
199 print(combine_plot) | |
200 | |
201 ## for each annotation group find last pixel, there dashed lines will be drawn in plots over spectra index | |
202 pixel_name_df = data.frame(pixels(msidata), msidata\$annotation) | |
203 colnames(pixel_name_df) = c("pixel_number", "pixel_name") | |
204 last_pixel = aggregate(pixel_number~pixel_name, data = pixel_name_df, max) | |
205 pixel_vector = last_pixel[,2] | |
206 abline_vector = pixel_vector[1:number_combined-1] | |
207 print(abline_vector) | |
208 } | |
209 | |
210 ################### 1) Pixel order image ################################### | |
211 | |
212 pixelnumber = 1:pixelcount | |
213 pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber) | |
214 gg_title = "Pixel order" | |
215 | |
216 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))+ | |
217 geom_tile() + coord_fixed()+ | |
218 ggtitle(gg_title) + theme_bw()+ | |
219 theme(plot.title = element_text(hjust = 0.5))+ | |
220 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
221 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), | |
222 space = "Lab", na.value = "black", name = "Pixel\nnumber")) | |
223 | |
224 ################ 2) Number of calibrants per spectrum ###################### | |
225 | |
226 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not | |
227 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) | |
228 | |
229 ## plot only possible when there is at least one valid calibrant | |
230 if (length(inputcalibrantmasses) != 0){ | |
231 | |
232 ## calculate plusminus values in m/z for each calibrant | |
233 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses | |
234 | |
235 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 | |
236 | |
237 for (mass in 1:length(inputcalibrantmasses)){ | |
238 | |
239 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] | |
240 | |
241 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ | |
242 | |
243 ## intensity of all m/z > 0 | |
244 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 | |
245 | |
246 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ | |
247 | |
248 ## intensity of only m/z > 0 | |
249 intensity_sum = spectra(filtered_data)[] > 0 | |
250 | |
251 }else{ | |
252 | |
253 intensity_sum = rep(FALSE, ncol(filtered_data))} | |
254 | |
255 ## for each pixel add sum of intensities > 0 in the given m/z range | |
256 pixelmatrix = rbind(pixelmatrix, intensity_sum) | |
257 } | |
258 | |
259 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) | |
260 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) | |
261 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts | |
262 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen") | |
263 | |
264 print(ggplot(countdf, aes(x=x, y=y, fill=countvector))+ | |
265 geom_tile() + coord_fixed() + | |
266 ggtitle(paste0("Number of calibrants per pixel (±",$plusminus_ppm, " ppm)")) + | |
267 theme_bw() + | |
268 theme(plot.title = element_text(hjust = 0.5))+ | |
269 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
270 scale_fill_manual(values = mycolours[1:length(countvector)], | |
271 na.value = "black", name = "# calibrants")) | |
272 | |
273 }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")} | |
274 | |
275 ########################## 3) fold change image ########################### | |
276 | |
277 #if $calibrantratio: | |
278 #for $foldchanges in $calibrantratio: | |
279 mass1 = $foldchanges.mass1 | |
280 mass2 = $foldchanges.mass2 | |
281 distance = $foldchanges.distance | |
282 | |
283 ### if user did not write a label use input m/z as label | |
284 #if not str($foldchanges.filenameratioplot).strip(): | |
285 #set $label = "Fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2) | |
286 #else: | |
287 #set $label = $foldchanges.filenameratioplot | |
288 #end if | |
289 | |
290 ### filter msidata for given m/z range (for both input m/z) | |
291 filtered_data1 = msidata[mz(msidata) >= mass1-distance & mz(msidata) <= mass1+distance,] | |
292 filtered_data2 = msidata[mz(msidata) >= mass2-distance & mz(msidata) <= mass2+distance,] | |
293 | |
294 ### find m/z in the two given ranges with the highest mean intensity | |
295 ### this two m/z will be used to calculate the fold change (red line in plot) | |
296 maxmassrow1 = rowMeans(spectra(filtered_data1), na.rm=TRUE) | |
297 maxmass1 = mz(filtered_data1)[which.max(maxmassrow1)] | |
298 maxmassrow2 = rowMeans(spectra(filtered_data2), na.rm=TRUE) | |
299 maxmass2 = mz(filtered_data2)[which.max(maxmassrow2)] | |
300 | |
301 ### plot legend: chosen value in blue, distance in blue, max m/z in red | |
302 ### m/z range for each plot (fixed range of 5 Da) | |
303 ### xlim does not work because it does not adjust for the max. intensities within the range | |
304 mzdown1 = features(msidata, mz = mass1-2) | |
305 mzup1 = features(msidata, mz = mass1+3) | |
306 mzdown2 = features(msidata, mz = mass2-2) | |
307 mzup2 = features(msidata, mz = mass2+3) | |
308 | |
309 ### plot for first m/z | |
310 par(mfrow=c(2,1), oma=c(0,0,2,0)) | |
311 plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("Average spectrum ", mass1, " Da")) | |
312 abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,6,3)) | |
313 abline(v=maxmass1, col="red", lty=5) | |
314 | |
315 ### plot for second m/z | |
316 plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("Average spectrum ", mass2, " Da")) | |
317 abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,6,3)) | |
318 abline(v=maxmass2, col="red", lty=5) | |
319 title("Control of fold change plot", outer=TRUE) | |
320 | |
321 ### filter spectra for max m/z to have two vectors, which can be divided | |
322 ### plot spatial distribution of fold change | |
323 ### only possible when there are intensities > 0 in both given m/z ranges | |
324 | |
325 if (length(maxmass1)>0&length(maxmass2)>0){ | |
326 mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),] | |
327 mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),] | |
328 foldchange= log2(mass1vector/mass2vector) | |
329 fcmatrix = cbind(foldchange, coord(msidata)[,1:2]) | |
330 | |
331 print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange), colour=colo)+ | |
332 geom_tile() + coord_fixed()+ | |
333 ggtitle("$label")+ | |
334 theme_bw()+ | |
335 theme(plot.title = element_text(hjust = 0.5))+ | |
336 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
337 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") | |
338 ,space = "Lab", na.value = "black", name ="FC")) | |
339 }else{ | |
340 plot(0,type='n',axes=FALSE,ann=FALSE) | |
341 title(main=paste("At least one m/z range did not contain any intensity > 0,\n therefore no foldchange plot could be drawn"))} | |
342 | |
343 #end for | |
344 #end if | |
345 | |
346 #################### 4) m/z heatmaps ####################################### | |
347 | |
348 par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0) | |
349 if (length(inputcalibrants[,1]) != 0){ | |
350 for (mass in 1:length(inputcalibrants[,1])){ | |
351 | |
352 | |
353 image(msidata, mz=inputcalibrants[,1][mass], plusminus=plusminusvalues[mass], | |
354 main= paste0(inputcalibrants[,2][mass], ": ", round(inputcalibrants[,1][mass], digits = 2)," (±",$plusminus_ppm, " ppm)"), | |
355 contrast.enhance = "histogram", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy)) | |
356 } | |
357 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} | |
358 | |
359 #################### 5) Number of peaks per pixel - image ################## | |
360 | |
361 ## here every intensity value > 0 counts as peak | |
362 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) | |
363 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel) | |
364 | |
365 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)+ | |
366 geom_tile() + coord_fixed() + | |
367 ggtitle("Number of peaks per spectrum")+ | |
368 theme_bw() + | |
369 theme(plot.title = element_text(hjust = 0.5))+ | |
370 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
371 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") | |
372 ,space = "Lab", na.value = "black", name = "# peaks")) | |
373 | |
374 | |
375 ############################### 6) TIC image ############################### | |
376 | |
377 TICcoordarray=cbind(coord(msidata)[,1:2], TICs) | |
378 colo = colorRampPalette( | |
379 c("blue", "cyan", "green", "yellow","red")) | |
380 print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)+ | |
381 geom_tile() + coord_fixed() + | |
382 ggtitle("Total Ion Chromatogram")+ | |
383 theme_bw() + | |
384 theme(plot.title = element_text(hjust = 0.5))+ | |
385 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
386 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") | |
387 ,space = "Lab", na.value = "black", name = "TIC")) | |
388 | |
389 | |
390 ############################### 7) Most abundant m/z image ################# | |
391 | |
392 highestmz = apply(spectra(msidata)[],2,which.max) | |
393 highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz]) | |
394 colnames(highestmz_matrix)[3] = "highestmzinDa" | |
395 | |
396 print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))+ | |
397 geom_tile() + coord_fixed() + | |
398 ggtitle("Most abundant m/z in each spectrum")+ | |
399 theme_bw() + | |
400 theme(plot.title = element_text(hjust = 0.5))+ | |
401 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", | |
402 limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))+ | |
403 theme(text=element_text(family="ArialMT", face="bold", size=12))) | |
404 | |
405 | |
406 ########################## 8) optional pca image for two components ################# | |
407 | |
408 #if $do_pca: | |
409 | |
410 pca = PCA(msidata, ncomp=2) | |
411 par(mfrow = c(2,1)) | |
412 plot(pca, col=c("black", "darkgrey"), main="PCA for two components") | |
413 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy)) | |
414 | |
415 #end if | |
416 | |
417 ################## III) properties over spectra index ###################### | |
418 ############################################################################ | |
419 print("properties over pixels") | |
420 par(mfrow = c(2,1), mar=c(5,6,4,2)) | |
421 | |
422 ########################## 9) number of peaks per spectrum ################# | |
423 ## 9a) scatterplot | |
424 | |
425 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") | |
426 title(xlab="Spectra index", line=3) | |
427 title(ylab="Number of peaks", line=4) | |
428 | |
429 if (!is.null(levels(msidata\$annotation))){ | |
430 abline(v=abline_vector, lty = 3)} | |
431 | |
432 ## 9b) histogram | |
433 | |
434 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") | |
435 title(main="Number of peaks per spectrum", line=2) | |
436 title(ylab="Frequency = # spectra", line=4) | |
437 abline(v=median(peaksperpixel), col="blue") | |
438 | |
439 ## 9c) additional histogram to show contribution of annotation groups | |
440 | |
441 if (!is.null(levels(msidata\$annotation))){ | |
442 | |
443 df_9 = data.frame(peaksperpixel, msidata\$annotation) | |
444 colnames(df_9) = c("Npeaks", "annotation") | |
445 | |
446 hist_9 = ggplot(df_9, aes(x=Npeaks, fill=annotation)) + | |
447 geom_histogram()+ theme_bw()+ | |
448 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
449 theme(plot.title = element_text(hjust = 0.5))+ | |
450 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ | |
451 theme(legend.position="bottom",legend.direction="vertical")+ | |
452 labs(title="Number of peaks per spectrum and annotation group", x="Number of peaks per spectrum", y = "Frequency = # spectra") + | |
453 guides(fill=guide_legend(ncol=5,byrow=TRUE))+ | |
454 geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed") | |
455 print(hist_9)} | |
456 | |
457 ########################## 10) TIC per spectrum ########################### | |
458 | |
459 ## 10a)density scatterplot | |
460 par(mfrow = c(2,1), mar=c(5,6,4,2)) | |
461 | |
462 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum") | |
463 title(xlab="Spectra index", line=3) | |
464 title(ylab = "Total ion chromatogram intensity", line=4) | |
465 if (!is.null(levels(msidata\$annotation))){ | |
466 abline(v=abline_vector, lty = 3)} | |
467 | |
468 ## 10b) histogram | |
469 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") | |
470 title(main= "TIC per spectrum", line=2) | |
471 title(ylab="Frequency = # spectra", line=4) | |
472 abline(v=median(log(TICs[TICs>0])), col="blue") | |
473 | |
474 ## 10c) additional histogram to show annotation contributions | |
475 if (!is.null(levels(msidata\$annotation))){ | |
476 df_10 = data.frame(log(TICs), msidata\$annotation) | |
477 colnames(df_10) = c("TICs", "annotation") | |
478 | |
479 hist_10 = ggplot(df_10, aes(x=TICs, fill=annotation)) + | |
480 geom_histogram()+ theme_bw()+ | |
481 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
482 theme(plot.title = element_text(hjust = 0.5))+ | |
483 theme(legend.position="bottom",legend.direction="vertical")+ | |
484 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ | |
485 labs(title="TIC per spectrum and annotation group", x="log(TIC per spectrum)", y = "Frequency = # spectra") + | |
486 guides(fill=guide_legend(ncol=5,byrow=TRUE))+ | |
487 geom_vline(xintercept = median(log(TICs[TICs>0])), size = 1, colour = "black",linetype = "dashed") | |
488 print(hist_10)} | |
489 | |
490 ################################## IV) properties over m/z #################### | |
491 ############################################################################ | |
492 print("properties over m/z") | |
493 | |
494 ########################## 11) Histogram of m/z values ##################### | |
495 | |
496 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) | |
497 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values") | |
498 | |
499 ########################## 12) Number of peaks per m/z ##################### | |
500 | |
501 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) | |
502 print(median(peakspermz/pixelcount*100)) | |
503 | |
504 par(mfrow = c(2,1), mar=c(5,6,4,4.5)) | |
505 ## 12a) scatterplot | |
506 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="") | |
507 title(xlab="m/z", line=2.5) | |
508 title(ylab = "Number of peaks", line=4) | |
509 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) | |
510 mtext("Coverage of spectra [%]", 4, line=3, adj=1) | |
511 | |
512 ## 12b) histogram | |
513 hist(peakspermz, main="", las=1, ylab="", xlab="") | |
514 title(ylab = "Frequency", line=4) | |
515 title(main="Number of peaks per m/z", xlab = "Number of peaks per m/z", line=2) | |
516 abline(v=median(peakspermz), col="blue") | |
517 | |
518 ########################## 13) Sum of intensities per m/z ################## | |
519 | |
520 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) | |
521 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z | |
522 | |
523 par(mfrow = c(2,1), mar=c(5,6,4,2)) | |
524 ## 13a) scatterplot | |
525 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="") | |
526 title(xlab="m/z", line=2.5) | |
527 title(ylab="Intensity sum", line=4) | |
528 | |
529 ## 13b) histogram | |
530 hist(log(mzTIC), main="", xlab = "", las=1, ylab="") | |
531 title(main="Sum of intensities per m/z", line=2, ylab="") | |
532 title(xlab = "log (sum of intensities per m/z)") | |
533 title(ylab = "Frequency", line=4) | |
534 abline(v=median(log(mzTIC[mzTIC>0])), col="blue") | |
535 | |
536 ################################## V) intensity plots ######################## | |
537 ############################################################################ | |
538 print("intensity plots") | |
539 ########################## 14) Intensity distribution ###################### | |
540 | |
541 par(mfrow = c(2,1), mar=c(5,6,4,2)) | |
542 | |
543 ## 14a) Median intensity over spectra | |
544 | |
545 medianint_spectra = apply(spectra(msidata), 2, median, na.rm=TRUE) | |
546 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="") | |
547 title(ylab="Median spectrum intensity", line=4) | |
548 if (!is.null(levels(msidata\$annotation))){ | |
549 abline(v=abline_vector, lty = 3)} | |
550 | |
551 ## 14b) histogram: | |
552 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) | |
553 title(main="Log2-transformed intensities", line=2) | |
554 title(xlab="log2 intensities") | |
555 title(ylab="Frequency", line=4) | |
556 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)]), na.rm=TRUE), col="blue") | |
557 | |
558 | |
559 ## 14c) histogram to show contribution of annotation groups | |
560 | |
561 if (!is.null(levels(msidata\$annotation))){ | |
562 | |
563 df_13 = data.frame(matrix(,ncol=2, nrow=0)) | |
564 for (subsample in levels(msidata\$annotation)){ | |
565 log2_int_subsample = log2(spectra(msidata)[,msidata\$annotation==subsample]) | |
566 df_subsample = data.frame(as.numeric(log2_int_subsample)) | |
567 df_subsample\$annotation = subsample | |
568 df_13 = rbind(df_13, df_subsample)} | |
569 df_13\$annotation = as.factor(df_13\$annotation) | |
570 colnames(df_13) = c("logint", "annotation") | |
571 | |
572 hist_13 = ggplot(df_13, aes(x=logint, fill=annotation)) + | |
573 geom_histogram()+ theme_bw()+ | |
574 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
575 labs(title="Log2-transformed intensities per sample", x="log2 intensities", y = "Frequency") + | |
576 theme(plot.title = element_text(hjust = 0.5))+ | |
577 theme(legend.position="bottom",legend.direction="vertical")+ | |
578 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ | |
579 guides(fill=guide_legend(ncol=5,byrow=TRUE))+ | |
580 geom_vline(xintercept = median(log2(spectra(msidata)[(spectra(msidata)>0)])), size = 1, colour = "black",linetype = "dashed") | |
581 print(hist_13) | |
582 | |
583 ## 14d) boxplots to visualize in a different way the intensity distributions | |
584 par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1)) | |
585 | |
586 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata)) | |
587 for (subsample in levels(msidata\$annotation)){ | |
588 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE) | |
589 mean_matrix = cbind(mean_matrix, mean_mz_sample)} | |
590 | |
591 boxplot(log2(mean_matrix), ylab = "log2 mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n") | |
592 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2)) | |
593 | |
594 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups | |
595 | |
596 corr_matrix = mean_matrix | |
597 corr_matrix[corr_matrix == 0] <- NA | |
598 colnames(corr_matrix) = levels(msidata\$annotation) | |
599 corr_matrix = cor(log2(corr_matrix), method= "pearson",use="complete.obs") | |
600 | |
601 heatmap.parameters <- list(corr_matrix, | |
602 show_rownames = T, show_colnames = T, | |
603 main = "Pearson correlation on mean intensities for each annotation group") | |
604 do.call("pheatmap", heatmap.parameters) | |
605 } | |
606 | |
607 ################################## VI) Mass spectra and m/z accuracy ######################## | |
608 ############################################################################ | |
609 print("Mass spectra and m/z accuracy") | |
610 | |
611 ############################ 15) Mass spectra ############################## | |
612 | |
613 ## find three random pixel to plot their spectra in the following plots: | |
614 pixel1 = sample(pixelnumber,1) | |
615 pixel2 = sample(pixelnumber,1) | |
616 pixel3 = sample(pixelnumber,1) | |
617 | |
618 ## replace any NA with 0, otherwise plot function will not work at all | |
619 msidata_no_NA = msidata | |
620 spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0 | |
621 | |
622 par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) | |
623 plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum") | |
624 plot(msidata_no_NA, pixel = pixel1, main=paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel1,1:2]))) | |
625 plot(msidata_no_NA, pixel = pixel2, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2]))) | |
626 plot(msidata_no_NA, pixel = pixel3, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel3,1:2]))) | |
627 | |
628 ################### 16) Zoomed in mass spectra for calibrants ############## | |
629 | |
630 count = 1 | |
631 differencevector = numeric() | |
632 differencevector2 = vector() | |
633 | |
634 if (length(inputcalibrantmasses) != 0){ | |
635 | |
636 ### calculate plusminus values in m/z for each calibrant, this is used for all following plots | |
637 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses)) * inputcalibrantmasses | |
638 | |
639 for (mass in 1:length(inputcalibrantmasses)){ | |
640 | |
641 ### define the plot window with xmin und xmax | |
642 minmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-1) | |
643 maxmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+3) | |
644 | |
645 ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17 | |
646 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] | |
647 | |
648 if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){ | |
649 maxmassrow = rowMeans(spectra(filtered_data)) ## for each m/z average intensity is calculated | |
650 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range | |
651 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value | |
652 ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement | |
653 }else{ | |
654 ppmdifference = NA | |
655 maxvalue = NA} | |
656 differencevector[mass] = round(ppmdifference, digits=2) | |
657 | |
658 ### find m/z closest to inputcalibrant and calculate ppm difference for plot 18 | |
659 mznumber = features(msidata_no_NA, mz = inputcalibrantmasses[mass]) ### gives featurenumber which is closest to given m/z | |
660 mzvalue = mz(msidata_no_NA)[mznumber] ### gives closest m/z | |
661 mzdifference2 = mzvalue - inputcalibrantmasses[mass] | |
662 ppmdifference2 = mzdifference2/inputcalibrantmasses[mass]*1000000 | |
663 differencevector2[mass] = round(ppmdifference2, digits=2) | |
664 | |
665 par(mfrow = c(2, 2), oma=c(0,0,2,0)) | |
666 plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "Average spectrum") | |
667 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) | |
668 abline(v=c(maxvalue), col="red", lty=2) | |
669 abline(v=c(mzvalue), col="green2", lty=4) | |
670 plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel1, main=paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel1,1:2]))) | |
671 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) | |
672 abline(v=c(maxvalue), col="red", lty=2) | |
673 abline(v=c(mzvalue), col="green2", lty=4) | |
674 plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel2, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2]))) | |
675 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) | |
676 abline(v=c(maxvalue), col="red", lty=2) | |
677 abline(v=c(mzvalue), col="green2", lty=4) | |
678 plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel3, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel3,1:2]))) | |
679 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) | |
680 abline(v=c(maxvalue), col="red", lty=2) | |
681 abline(v=c(mzvalue), col="green2", lty=4) | |
682 title(paste0("theor. m/z: ", round(inputcalibrants[count,1], digits=4)), col.main="blue", outer=TRUE, line=0, adj=0.074) | |
683 title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49) | |
684 title(paste0("closest m/z: ", round(mzvalue, digits=4)), col.main="green2", outer=TRUE, line=0, adj=0.93) | |
685 | |
686 ### 16b) one large extra plot with different colours for different pixel annotation groups | |
687 | |
688 if (!is.null(levels(msidata\$annotation))){ | |
689 if (number_combined < 10){ | |
690 key_zoomed = TRUE | |
691 }else{key_zoomed = FALSE} | |
692 par(mfrow = c(1, 1)) | |
693 plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel=1:ncol(msidata_no_NA),main="Average spectrum per annotation group", | |
694 pixel.groups=msidata\$annotation, key=key_zoomed, col=hue_pal()(number_combined),superpose=TRUE) | |
695 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="black", lty=c(3,1,3)) | |
696 } | |
697 count=count+1 | |
698 } | |
699 | |
700 ######### 17) ppm difference input calibrant m/z and m/z with max intensity in given m/z range######### | |
701 | |
702 par(mfrow = c(1,1)) | |
703 ### plot the ppm difference calculated above: theor. m/z value to highest m/z value: | |
704 | |
705 calibrant_names = as.character(inputcalibrants[,2]) | |
706 diff_df = data.frame(differencevector, calibrant_names) | |
707 | |
708 if (sum(is.na(diff_df[,1])) == nrow(diff_df)){ | |
709 plot(0,type='n',axes=FALSE,ann=FALSE) | |
710 title(main=paste("plot 17: no peaks in the chosen region, repeat with higher ppm range")) | |
711 }else{ | |
712 | |
713 diff_plot1=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() + | |
714 labs(title="Average m/z error (max. average intensity vs. theor. calibrant m/z)", x="calibrants", y = "Average m/z error in ppm")+ | |
715 theme(plot.title = element_text(hjust = 0.5, size=14))+theme(text=element_text(family="ArialMT", face="bold", size=14))+ | |
716 geom_text(aes(label=differencevector), vjust=-0.3, size=5.5, col="blue") + | |
717 theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14)) | |
718 | |
719 print(diff_plot1) | |
720 } | |
721 | |
722 ######### 18) ppm difference input calibrant m/z and closest m/z ########### | |
723 | |
724 ### plot the ppm difference calculated above theor. m/z value to closest m/z value: | |
725 | |
726 differencevector2 = round(differencevector2, digits=2) | |
727 calibrant_names = as.character(inputcalibrants[,2]) | |
728 diff_df = data.frame(differencevector2, calibrant_names) | |
729 | |
730 diff_plot2=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector2)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() + | |
731 labs(title="Average m/z error (closest measured m/z vs. theor. calibrant m/z)", x="calibrants", y = "Average m/z error in ppm")+ | |
732 theme(plot.title = element_text(hjust = 0.5, size=14))+theme(text=element_text(family="ArialMT", face="bold", size=14))+ | |
733 geom_text(aes(label=differencevector2), vjust=-0.3, size=5.5, col="blue")+ | |
734 theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14)) | |
735 | |
736 print(diff_plot2) | |
737 | |
738 #################### 19) ppm difference over pixels ##################### | |
739 | |
740 par(mfrow = c(1,1)) | |
741 mycolours = c("darkgrey", "darkblue", "blue", "green" , "red", "orange", "yellow", "magenta", "olivedrab1", "lightseagreen") | |
742 count = 1 | |
743 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata))) | |
744 for (calibrant in inputcalibrantmasses){ | |
745 ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA | |
746 filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],] | |
747 | |
748 if (nrow(filtered_data) > 0){ | |
749 ### filtered for m/z range, find max peak in each spectrum (pixel)( | |
750 ppm_vector = numeric() | |
751 for (pixel_count in 1:ncol(filtered_data)){ | |
752 mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])] | |
753 | |
754 mzdiff = mz_max - calibrant | |
755 ppmdiff = mzdiff/calibrant*1000000 | |
756 | |
757 ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot) | |
758 if (max(spectra(filtered_data)[,pixel_count]) == 0){ | |
759 ppmdiff = NA} | |
760 ppm_vector[pixel_count] = ppmdiff} | |
761 | |
762 }else{ | |
763 ppm_vector = rep(NA, ncol(msidata)) | |
764 } | |
765 | |
766 ppm_df = cbind(ppm_df, ppm_vector) | |
767 count=count+1 | |
768 } | |
769 | |
770 if (sum(is.na(ppm_df)) == ncol(ppm_df)*nrow(ppm_df)){ | |
771 plot(0,type='n',axes=FALSE,ann=FALSE) | |
772 title(main=paste("plot 19: no peaks in the chosen region, repeat with higher ppm range")) | |
773 }else{ | |
774 | |
775 ### plot ppm differences over pixels (spectra index) | |
776 par(mar=c(4.1, 4.1, 4.1, 7.5)) | |
777 | |
778 plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)") | |
779 for (each_cal in 1:ncol(ppm_df)){ | |
780 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")} | |
781 legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1) | |
782 if (!is.null(levels(msidata\$annotation))){ | |
783 abline(v=abline_vector, lty = 3)}} | |
784 | |
785 }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")} | |
786 }else{ | |
787 print("inputfile has no intensities > 0") | |
788 } | |
789 dev.off() | |
790 | |
791 | |
792 ]]></configfile> | |
793 </configfiles> | |
794 <inputs> | |
795 <expand macro="reading_msidata"/> | |
796 <conditional name="tabular_annotation"> | |
797 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file for QC plots"> | |
798 <option value="no_annotation" selected="True">pixels belong into one group only</option> | |
799 <option value="yes_annotation">use pixel annotation from a tabular file</option> | |
800 </param> | |
801 <when value="yes_annotation"> | |
802 <expand macro="reading_pixel_annotations"/> | |
803 </when> | |
804 <when value="no_annotation"/> | |
805 </conditional> | |
806 <expand macro="pdf_filename"/> | |
807 <expand macro="reading_2_column_mz_tabular"/> | |
808 <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/> | |
809 <param name="do_pca" type="boolean" label="PCA with 2 components"/> | |
810 <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10"> | |
811 <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/> | |
812 <param name="mass2" value="2222" type="float" label="M/z 2" help="Second m/z"/> | |
813 <param name="distance" value="0.25" type="float" label="M/z range" help="Plusminus m/z window added to input m/z. In both m/z ranges the maximum intensity is used to calculate the fold change"/> | |
814 <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot."> | |
815 <sanitizer invalid_char=""> | |
816 <valid initial="string.ascii_letters,string.digits"> | |
817 <add value="_" /> | |
818 </valid> | |
819 </sanitizer> | |
820 </param> | |
821 </repeat> | |
822 <param name="pixel_output" type="boolean" label="Tabular output with spectra information"/> | |
823 </inputs> | |
824 <outputs> | |
825 <data format="pdf" name="QC_report" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on ${on_string}"/> | |
826 </outputs> | |
827 <tests> | |
828 <test> | |
829 <expand macro="infile_imzml"/> | |
830 <conditional name="processed_cond"> | |
831 <param name="processed_file" value="processed"/> | |
832 <param name="accuracy" value="200"/> | |
833 <param name="units" value="ppm"/> | |
834 </conditional> | |
835 <conditional name="tabular_annotation"> | |
836 <param name="load_annotation" value="no_annotation"/> | |
837 </conditional> | |
838 <param name="calibrant_file" value="inputcalibrantfile1.tabular" ftype="tabular"/> | |
839 <param name="mz_column" value="1"/> | |
840 <param name="name_column" value="1"/> | |
841 <param name="plusminus_ppm" value="100"/> | |
842 <param name="filename" value="Testfile_imzml"/> | |
843 <param name="do_pca" value="True"/> | |
844 <repeat name="calibrantratio"> | |
845 <param name="mass1" value="328.9"/> | |
846 <param name="mass2" value="398.8"/> | |
847 <param name="distance" value="0.25"/> | |
848 <param name="filenameratioplot" value = "Ratio of mass1 (328.9) / mass2 (398.8)"/> | |
849 </repeat> | |
850 <output name="QC_report" file="QC_imzml.pdf" compare="sim_size"/> | |
851 </test> | |
852 | |
853 <test> | |
854 <expand macro="infile_analyze75"/> | |
855 <conditional name="tabular_annotation"> | |
856 <param name="load_annotation" value="no_annotation"/> | |
857 </conditional> | |
858 <param name="calibrant_file" value="inputcalibrantfile2.txt"/> | |
859 <param name="mz_column" value="1"/> | |
860 <param name="name_column" value="2"/> | |
861 <param name="filename" value="Testfile_analyze75"/> | |
862 <param name="do_pca" value="True"/> | |
863 <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/> | |
864 </test> | |
865 | |
866 <test> | |
867 <param name="infile" value="3_files_combined.RData" ftype="rdata"/> | |
868 <conditional name="tabular_annotation"> | |
869 <param name="load_annotation" value="yes_annotation"/> | |
870 <param name="annotation_file" value="annotations_rdata.tabular"/> | |
871 <param name="column_x" value="1"/> | |
872 <param name="column_y" value="2"/> | |
873 <param name="column_names" value="3"/> | |
874 <param name="tabular_header" value="True"/> | |
875 </conditional> | |
876 <param name="calibrant_file" value="inputcalibrantfile1.tabular" ftype="tabular"/> | |
877 <param name="mz_column" value="1"/> | |
878 <param name="name_column" value="1"/> | |
879 <param name="plusminus_ppm" value="100"/> | |
880 <param name="filename" value="Testfile_rdata"/> | |
881 <param name="do_pca" value="True"/> | |
882 <output name="QC_report" file="QC_rdata.pdf" compare="sim_size"/> | |
883 </test> | |
884 <test> | |
885 <param name="infile" value="empty_spectra.rdata" ftype="rdata"/> | |
886 <conditional name="tabular_annotation"> | |
887 <param name="load_annotation" value="no_annotation"/> | |
888 </conditional> | |
889 <param name="calibrant_file" value="inputcalibrantfile2.txt"/> | |
890 <param name="mz_column" value="1"/> | |
891 <param name="name_column" value="2"/> | |
892 <param name="filename" value="Testfile_rdata"/> | |
893 <param name="do_pca" value="False"/> | |
894 <output name="QC_report" file="QC_empty_spectra.pdf" compare="sim_size"/> | |
895 </test> | |
896 </tests> | |
897 <help> | |
898 <![CDATA[ | |
899 @CARDINAL_DESCRIPTION@ | |
900 | |
901 ----- | |
902 | |
903 This tool uses Cardinal to read files and create a quality control report with descriptive plots for mass spectrometry imaging data. | |
904 | |
905 @MSIDATA_INPUT_DESCRIPTION@ | |
906 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before the tools analysis starts. | |
907 @SPECTRA_TABULAR_INPUT_DESCRIPTION@ | |
908 | |
909 @MZ_2COLS_TABULAR_INPUT_DESCRIPTION@ | |
910 | |
911 **Options** | |
912 | |
913 - m/z of interest (e.g. internal calibrants) and the ppm range are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy plots | |
914 - Optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio)) | |
915 - All plots are described in more detail below | |
916 | |
917 **Tip** | |
918 | |
919 - For additional m/z heatmaps use the MSI mz images tool and to plot more mass spectra use the MSI mass spectra tool. | |
920 - To obtain the underlaying spectra and feature values used in this quality report, the imzML exporter tool can be used | |
921 | |
922 | |
923 **Output** | |
924 | |
925 - quality control report as pdf with key numbers and descriptive plots describing the mass spectrometry imaging data | |
926 | |
927 | |
928 ---------------------------------------------------------------------------------------------------------------------------------------------------- | |
929 | |
930 **Overview of the QC report plots** | |
931 | |
932 - (annot): this plots will only be drawn if pixel annotations are loaded via a tabular file | |
933 - (cal): this plots will only be drawn if a tabular file with at least one valid calibrant m/z is provided | |
934 - (FC): this plots will only be drawn if the optional fold change image is selected | |
935 - Vertical lines in histograms represent median values. In density scatter plots the colour changes from blue to green, yellow and red the more points are overlayed. | |
936 | |
937 - Overview of file properties: Numbers and ranges for m/z features and pixels are given. Median and range across all intensity values are provided. Intensities > 0 gives the percentage of m/z-pixel pairs with an intensity above zero. The number of empty spectra (TIC = 0), the median number of peaks (intensities > 0) per spectra as well as the median TIC (total ion chromatogram) are given. The processing status of the file is provided as well as the number of valid calibrants from the provided tabular file.> 0 (Intensities > 0). | |
938 | |
939 **x-y images (pixel/spectra information)** | |
940 | |
941 - (annot) Spatial orientation of annotated pixel: All pixels of one annotation group have the same colour. | |
942 - Pixel order: Shows the order of the pixels in the provided file. Depending on the instrument this can represent the acquisition order. If annotation file is provided pixels are ordered according to annotation groups. | |
943 - (cal) Number of calibrants per pixel: In every spectrum the calibrant m/z window (calibrant m/z plusminus 'ppm range') is searched for peaks (intensity > 0). Calibrants are considered present in a spectrum when they have at least one peak in their m/z window. | |
944 - (FC) Control of fold change plot: For both input m/z a zoomed in average spectrum is drawn with the input m/z as blue dashed line, the m/z range as blue dotted lines and the maximum intensity in the m/z window with a red line. | |
945 - (FC) Fold change image: For each spectrum the intensities of the two optimal m/z features (red lines in control plots) are divided and log2 transformed to obtain the fold change, which is then plotted as a heatmap. | |
946 - (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z. The intensities are averaged within the calibrant m/z window (ppm range). | |
947 - Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap. | |
948 - Total ion chromatogram: For each spectrum all intensities are summed up to obtain the TIC which is plotted as heatmap. | |
949 - Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted. | |
950 - PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels. | |
951 | |
952 **Properties over spectra/pixels** | |
953 | |
954 - Number of peaks per spectrum: Scatter plot and histogram showing the number of intensities > 0 for each spectrum. If annotation tabular file is provided, the pixels are sorted according to annotation groups and the dotted lines in the scatter plot separate spectra of different annotation groups. | |
955 - (annot) Number of peaks per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group. | |
956 - TIC per spectrum: Scatter plot and histogram showing the sum of all intensities per spectrum (TIC). Dotted lines in the scatter plot separate spectra of different annotation groups. | |
957 - (annot) TIC per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group. | |
958 | |
959 **Properties over m/z features** | |
960 | |
961 - Histogram of m/z values: Histogram of all m/z values (complete m/z axis) | |
962 - Number of peaks per m/z: Scatter plot and histogram giving the number of intensities > 0 for each m/z. | |
963 - Sum of intensities per m/z: Scatter plot and histogram of the sum of all intensities per m/z. | |
964 | |
965 **Intensity plots** | |
966 | |
967 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups. | |
968 - Log2-transformed intensities: Histogram of log2-transformed intensities. | |
969 - (annot) log2-transformed intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. | |
970 - (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot. | |
971 - (annot) Pearson correlation between annotation groups based on mean intensities and shown as heatmap. | |
972 | |
973 **Mass spectra and m/z accuracy** | |
974 | |
975 - Mass spectra over the full m/z range: First plot shows the average intensities over all spectra. The other three mass spectra are from random individual pixels (spectra). | |
976 - (cal) For each calibrant four zoomed in mass spectrum are drawn: The first shows the average intensities over all spectra and the other three are single mass spectra. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. | |
977 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. | |
978 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra. | |
979 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra. | |
980 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups. | |
981 | |
982 | |
983 ]]> | |
984 </help> | |
985 <expand macro="citations"/> | |
986 </tool> | |
987 |