comparison quality_report.xml @ 4:3b7a35d50ebf draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:11:43 -0400
parents 16556ca0196b
children f0d1f3e97303
comparison
equal deleted inserted replaced
3:16556ca0196b 4:3b7a35d50ebf
1 <tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2"> 1 <tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.3">
2 <description> 2 <description>
3 mass spectrometry imaging QC 3 mass spectrometry imaging QC
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
34 library(pheatmap) 34 library(pheatmap)
35 35
36 36
37 @READING_MSIDATA_INRAM@ 37 @READING_MSIDATA_INRAM@
38 38
39 ## create full matrix to make processed imzML files compatible with segmentation and other steps
40 iData(msidata) <- iData(msidata)[]
41
42 ## remove duplicated coordinates 39 ## remove duplicated coordinates
43 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
44 msidata <- msidata[,!duplicated(coord(msidata))] 40 msidata <- msidata[,!duplicated(coord(msidata))]
45 41
46 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample) 42 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
47 43
48 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 44 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
65 61
66 ###################### calculation of data properties ################################ 62 ###################### calculation of data properties ################################
67 @DATA_PROPERTIES_INRAM@ 63 @DATA_PROPERTIES_INRAM@
68 64
69 ## Median intensities 65 ## Median intensities
70 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 66 medint = round(median(spectra(msidata), na.rm=TRUE), digits=2)
71 ## Spectra multiplied with m/z (potential number of peaks) 67 ## Spectra multiplied with m/z (potential number of peaks)
72 numpeaks = ncol(msidata)*nrow(msidata) 68 numpeaks = ncol(msidata)*nrow(msidata)
73 ## Percentage of intensities > 0 69 ## Percentage of intensities > 0
74 percpeaks = round(npeaks/numpeaks*100, digits=2) 70 percpeaks = round(npeaks/numpeaks*100, digits=2)
75 ## Number of empty TICs 71 ## Number of empty TICs
76 TICs = colSums(spectra(msidata)[], na.rm=TRUE) 72 TICs = colSums(spectra(msidata), na.rm=TRUE)
77 NumemptyTIC = sum(TICs == 0) 73 NumemptyTIC = sum(TICs == 0)
78 ## Median und sd TIC 74 ## Median und sd TIC
79 medTIC = round(median(TICs), digits=1) 75 medTIC = round(median(TICs), digits=1)
80 sdTIC = round(sd(TICs), digits=0) 76 sdTIC = round(sd(TICs), digits=0)
81 ## Median and sd # peaks per spectrum 77 ## Median and sd # peaks per spectrum
82 medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) 78 medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
83 sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0) 79 sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
84 ## Processing informations 80 ## Processing informations
85 centroidedinfo = centroided(msidata) 81 centroidedinfo = centroided(msidata)
86 82
87 ############## Read and filter tabular file with m/z ########################### 83 ############## Read and filter tabular file with m/z ###########################
88 84
245 241
246 for (mass in 1:length(inputcalibrantmasses)){ 242 for (mass in 1:length(inputcalibrantmasses)){
247 243
248 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] 244 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
249 245
250 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ 246 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
251 247
252 ## intensity of all m/z > 0 248 ## intensity of all m/z > 0
253 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 249 intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
254 250
255 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ 251 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
256 252
257 ## intensity of only m/z > 0 253 ## intensity of only m/z > 0
258 intensity_sum = spectra(filtered_data)[] > 0 254 intensity_sum = spectra(filtered_data) > 0
259 255
260 }else{ 256 }else{
261 257
262 intensity_sum = rep(FALSE, ncol(filtered_data))} 258 intensity_sum = rep(FALSE, ncol(filtered_data))}
263 259
362 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} 358 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
363 359
364 #################### 5) Number of peaks per pixel - image ################## 360 #################### 5) Number of peaks per pixel - image ##################
365 361
366 ## here every intensity value > 0 counts as peak 362 ## here every intensity value > 0 counts as peak
367 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) 363 peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
368 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel) 364 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
369 365
370 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+ 366 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
371 geom_tile() + coord_fixed() + 367 geom_tile() + coord_fixed() +
372 ggtitle("Number of peaks per spectrum")+ 368 ggtitle("Number of peaks per spectrum")+
398 rm(TICcoordarray) 394 rm(TICcoordarray)
399 gc() 395 gc()
400 396
401 ############################### 6b) median int image ############################### 397 ############################### 6b) median int image ###############################
402 398
403 median_int = apply(spectra(msidata)[],2,median) 399 median_int = apply(spectra(msidata),2,median)
404 median_coordarray=cbind(coord(msidata)[,1:2], median_int) 400 median_coordarray=cbind(coord(msidata)[,1:2], median_int)
405 401
406 print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+ 402 print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
407 geom_tile() + coord_fixed() + 403 geom_tile() + coord_fixed() +
408 ggtitle("Median intensity per spectrum")+ 404 ggtitle("Median intensity per spectrum")+
416 rm(median_coordarray) 412 rm(median_coordarray)
417 gc() 413 gc()
418 414
419 ############################### 6c) max int image ############################### 415 ############################### 6c) max int image ###############################
420 416
421 max_int = apply(spectra(msidata)[],2,max) 417 max_int = apply(spectra(msidata),2,max)
422 max_coordarray=cbind(coord(msidata)[,1:2], max_int) 418 max_coordarray=cbind(coord(msidata)[,1:2], max_int)
423 419
424 print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+ 420 print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
425 geom_tile() + coord_fixed() + 421 geom_tile() + coord_fixed() +
426 ggtitle("Maximum intensity per spectrum")+ 422 ggtitle("Maximum intensity per spectrum")+
435 gc() 431 gc()
436 432
437 ############################### 7) Most abundant m/z image ################# 433 ############################### 7) Most abundant m/z image #################
438 434
439 ## for each spectrum find the row (m/z) with the highest intensity 435 ## for each spectrum find the row (m/z) with the highest intensity
440 highestmz = apply(spectra(msidata)[],2,which.max) 436 highestmz = apply(spectra(msidata),2,which.max)
441 ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted 437 ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
442 if (class(highestmz) == "list"){ 438 if (class(highestmz) == "list"){
443 ##find zero-length values 439 ##find zero-length values
444 zero_entry <- !(sapply(highestmz, length)) 440 zero_entry <- !(sapply(highestmz, length))
445 ### replace these values with NA 441 ### replace these values with NA
475 par(mfrow = c(2,1)) 471 par(mfrow = c(2,1))
476 plot(pca, col=c("black", "darkgrey"), main="PCA for two components") 472 plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
477 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1)) 473 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))
478 474
479 for (PCs in 1:2){ 475 for (PCs in 1:2){
480 print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))} 476 print(image(pca, column = c(paste0("PC",PCs)) , strip=FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
481 477
482 ## remove pca to clean up RAM space 478 ## remove pca to clean up RAM space
483 rm(pca) 479 rm(pca)
484 gc() 480 gc()
485 481
573 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) 569 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
574 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values") 570 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values")
575 571
576 ########################## 12) Number of peaks per m/z ##################### 572 ########################## 12) Number of peaks per m/z #####################
577 573
578 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) 574 peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)
579 575
580 par(mfrow = c(2,1), mar=c(5,6,4,4.5)) 576 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
581 ## 12a) scatterplot 577 ## 12a) scatterplot
582 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="") 578 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="")
583 title(xlab="m/z", line=2.5) 579 title(xlab="m/z", line=2.5)
592 abline(v=median(peakspermz), col="blue") 588 abline(v=median(peakspermz), col="blue")
593 589
594 ########################## 13) Sum of intensities per m/z ################## 590 ########################## 13) Sum of intensities per m/z ##################
595 591
596 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) 592 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
597 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z 593 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
598 594
599 par(mfrow = c(2,1), mar=c(5,6,4,2)) 595 par(mfrow = c(2,1), mar=c(5,6,4,2))
600 ## 13a) scatterplot 596 ## 13a) scatterplot
601 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="") 597 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="")
602 title(xlab="m/z", line=2.5) 598 title(xlab="m/z", line=2.5)
623 title(ylab="Median spectrum intensity", line=4) 619 title(ylab="Median spectrum intensity", line=4)
624 if (!is.null(levels(msidata\$annotation))){ 620 if (!is.null(levels(msidata\$annotation))){
625 abline(v=abline_vector, lty = 3)} 621 abline(v=abline_vector, lty = 3)}
626 622
627 ## 14b) histogram: 623 ## 14b) histogram:
628 hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1) 624 hist(spectra(msidata), main="", xlab = "", ylab="", las=1)
629 title(main="Intensity histogram", line=2) 625 title(main="Intensity histogram", line=2)
630 title(xlab="intensities") 626 title(xlab="intensities")
631 title(ylab="Frequency", line=4) 627 title(ylab="Frequency", line=4)
632 abline(v=median(spectra(msidata)[(spectra(msidata)>0)], na.rm=TRUE), col="blue") 628 abline(v=median(spectra(msidata)[(spectra(msidata)>0)], na.rm=TRUE), col="blue")
633 629
662 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata)) 658 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
663 for (subsample in levels(msidata\$annotation)){ 659 for (subsample in levels(msidata\$annotation)){
664 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE) 660 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
665 mean_matrix = cbind(mean_matrix, mean_mz_sample)} 661 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
666 662
667 boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n") 663 boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
668 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2)) 664 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
669 665
670 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups 666 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
671 667
672 corr_matrix = mean_matrix 668 corr_matrix = mean_matrix
690 686
691 ############################ 15) Mass spectra ############################## 687 ############################ 15) Mass spectra ##############################
692 688
693 ## replace any NA with 0, otherwise plot function will not work at all 689 ## replace any NA with 0, otherwise plot function will not work at all
694 msidata_no_NA = msidata 690 msidata_no_NA = msidata
695 spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0 691 spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA))] = 0
696 692
697 ## find three equal m/z ranges for the average mass spectra plots: 693 ## find three equal m/z ranges for the average mass spectra plots:
698 third_mz_range = nrow(msidata_no_NA)/3 694 third_mz_range = nrow(msidata_no_NA)/3
699 695
700 par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) 696 par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
701 plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum") 697 plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum")
702 plot(msidata_no_NA[1:third_mz_range,], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum") 698 plot(msidata_no_NA[1:third_mz_range,], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
703 plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum") 699 plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
704 plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum") 700 plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
701
702 ## plot one average mass spectrum for each pixel annotation group
703
704 if (!is.null(levels(msidata\$annotation))){
705 ## print legend only for less than 10 samples
706 if (length(levels(msidata\$annotation)) < 10){
707 key_legend = TRUE
708 }else{key_legend = FALSE}
709 par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
710 plot(msidata, pixel=1:ncol(msidata), pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")
711 }
705 712
706 ## plot 4 random mass spectra 713 ## plot 4 random mass spectra
707 ## find four random pixel to plot their spectra in the following plots: 714 ## find four random pixel to plot their spectra in the following plots:
708 pixel1 = sample(pixelnumber,1) 715 pixel1 = sample(pixelnumber,1)
709 pixel2 = sample(pixelnumber,1) 716 pixel2 = sample(pixelnumber,1)
1088 **Intensity plots** 1095 **Intensity plots**
1089 1096
1090 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups. 1097 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
1091 - Histogram of intensities. 1098 - Histogram of intensities.
1092 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 1099 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
1093 - (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot. 1100 - (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot.
1094 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap. 1101 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
1095 1102
1096 **Mass spectra and m/z accuracy** 1103 **Mass spectra and m/z accuracy**
1097 1104
1098 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis. 1105 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
1106 - (annot) Average mass spectrum per annotation group.
1107 - Random mass spectra: The mass spectra for four random pixel are plotted.
1099 - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point. 1108 - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
1100 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately. 1109 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately.
1101 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra. 1110 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
1102 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra. 1111 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra.
1103 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups. 1112 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups.