cardinal_quality_report: quality_report.xml comparison

comparison quality_report.xml @ 4:3b7a35d50ebf draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d

author	galaxyp
date	Fri, 22 Mar 2019 08:11:43 -0400
parents	16556ca0196b
children	f0d1f3e97303

comparison

equal deleted inserted replaced

-:16556ca0196b
+:3b7a35d50ebf
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.3">
 <description>
 mass spectrometry imaging QC
 </description>
 <macros>
 <import>macros.xml</import>
 library(pheatmap)
 @READING_MSIDATA_INRAM@
-## create full matrix to make processed imzML files compatible with segmentation and other steps
-iData(msidata) <- iData(msidata)[]
 ## remove duplicated coordinates
-print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
 msidata <- msidata[,!duplicated(coord(msidata))]
 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
 ###################### calculation of data properties ################################
 @DATA_PROPERTIES_INRAM@
 ## Median intensities
-medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
+medint = round(median(spectra(msidata), na.rm=TRUE), digits=2)
 ## Spectra multiplied with m/z (potential number of peaks)
 numpeaks = ncol(msidata)*nrow(msidata)
 ## Percentage of intensities > 0
 percpeaks = round(npeaks/numpeaks*100, digits=2)
 ## Number of empty TICs
-TICs = colSums(spectra(msidata)[], na.rm=TRUE)
+TICs = colSums(spectra(msidata), na.rm=TRUE)
 NumemptyTIC = sum(TICs == 0)
 ## Median und sd TIC
 medTIC = round(median(TICs), digits=1)
 sdTIC = round(sd(TICs), digits=0)
 ## Median and sd # peaks per spectrum
-medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
+sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0)
 ## Processing informations
 centroidedinfo = centroided(msidata)
 ############## Read and filter tabular file with m/z ###########################
 for (mass in 1:length(inputcalibrantmasses)){
 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
-if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
+if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){
 ## intensity of all m/z > 0
-intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0
+intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0
-}else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
+}else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){
 ## intensity of only m/z > 0
-intensity_sum = spectra(filtered_data)[] > 0
+intensity_sum = spectra(filtered_data) > 0
 }else{
 intensity_sum = rep(FALSE, ncol(filtered_data))}
 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
 #################### 5) Number of peaks per pixel - image ##################
 ## here every intensity value > 0 counts as peak
-peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE)
+peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE)
 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
 geom_tile() + coord_fixed() +
 ggtitle("Number of peaks per spectrum")+
 rm(TICcoordarray)
 gc()
 ############################### 6b) median int image ###############################
-median_int = apply(spectra(msidata)[],2,median)
+median_int = apply(spectra(msidata),2,median)
 median_coordarray=cbind(coord(msidata)[,1:2], median_int)
 print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
 geom_tile() + coord_fixed() +
 ggtitle("Median intensity per spectrum")+
 rm(median_coordarray)
 gc()
 ############################### 6c) max int image ###############################
-max_int = apply(spectra(msidata)[],2,max)
+max_int = apply(spectra(msidata),2,max)
 max_coordarray=cbind(coord(msidata)[,1:2], max_int)
 print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
 geom_tile() + coord_fixed() +
 ggtitle("Maximum intensity per spectrum")+
 gc()
 ############################### 7) Most abundant m/z image #################
 ## for each spectrum find the row (m/z) with the highest intensity
-highestmz = apply(spectra(msidata)[],2,which.max)
+highestmz = apply(spectra(msidata),2,which.max)
 ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
 if (class(highestmz) == "list"){
 ##find zero-length values
 zero_entry <- !(sapply(highestmz, length))
 ### replace these values with NA
 par(mfrow = c(2,1))
 plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))
 for (PCs in 1:2){
-print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
+print(image(pca, column = c(paste0("PC",PCs)) , strip=FALSE, superpose = FALSE, main=paste0("PC", PCs), col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
 ## remove pca to clean up RAM space
 rm(pca)
 gc()
 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values")
 ########################## 12) Number of peaks per m/z #####################
-peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE)
+peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE)
 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
 ## 12a) scatterplot
 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="")
 title(xlab="m/z", line=2.5)
 abline(v=median(peakspermz), col="blue")
 ########################## 13) Sum of intensities per m/z ##################
 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
-mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z
+mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
 par(mfrow = c(2,1), mar=c(5,6,4,2))
 ## 13a) scatterplot
 plot_colorByDensity(mz(msidata),mzTIC,  main= "Sum of intensities per m/z", ylab ="")
 title(xlab="m/z", line=2.5)
 title(ylab="Median spectrum intensity", line=4)
 if (!is.null(levels(msidata\$annotation))){
 abline(v=abline_vector, lty = 3)}
 ## 14b) histogram:
-hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1)
+hist(spectra(msidata), main="", xlab = "", ylab="", las=1)
 title(main="Intensity histogram", line=2)
 title(xlab="intensities")
 title(ylab="Frequency", line=4)
 abline(v=median(spectra(msidata)[(spectra(msidata)>0)], na.rm=TRUE), col="blue")
 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
 for (subsample in levels(msidata\$annotation)){
 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
-boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n")
+boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
 corr_matrix = mean_matrix
 ############################ 15) Mass spectra ##############################
 ## replace any NA with 0, otherwise plot function will not work at all
 msidata_no_NA = msidata
-spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0
+spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA))] = 0
 ## find three equal m/z ranges for the average mass spectra plots:
 third_mz_range = nrow(msidata_no_NA)/3
 par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
 plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum")
 plot(msidata_no_NA[1:third_mz_range,], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
 plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
 plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
+## plot one average mass spectrum for each pixel annotation group
+if (!is.null(levels(msidata\$annotation))){
+## print legend only for less than 10 samples
+if (length(levels(msidata\$annotation)) < 10){
+key_legend = TRUE
+}else{key_legend = FALSE}
+par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
+plot(msidata, pixel=1:ncol(msidata), pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")
+}
 ## plot 4 random mass spectra
 ## find four random pixel to plot their spectra in the following plots:
 pixel1 = sample(pixelnumber,1)
 pixel2 = sample(pixelnumber,1)
 **Intensity plots**
 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
 - Histogram of intensities.
 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
-- (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot.
+- (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot.
 - (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
 **Mass spectra and m/z accuracy**
 - Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
+- (annot) Average mass spectrum per annotation group.
+- Random mass spectra: The mass spectra for four random pixel are plotted.
 - (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately.
 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra.
 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups.

Mercurial > repos > galaxyp > cardinal_quality_report

comparison quality_report.xml @ 4:3b7a35d50ebf draft