cardinal_quality_report: quality_report.xml comparison

comparison quality_report.xml @ 2:d4803c1e5e19 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1

author	galaxyp
date	Fri, 15 Feb 2019 10:21:09 -0500
parents	ae9ffc7ba261
children	16556ca0196b

comparison

equal deleted inserted replaced

-:ae9ffc7ba261
+:d4803c1e5e19
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.1">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.2">
 <description>
 mass spectrometry imaging QC
 </description>
 <macros>
 <import>macros.xml</import>
 library(KernSmooth)
 library(scales)
 library(pheatmap)
-@READING_MSIDATA@
+@READING_MSIDATA_INRAM@
+## create full matrix to make processed imzML files compatible with segmentation and other steps
+iData(msidata) <- iData(msidata)[]
 ## remove duplicated coordinates
 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
 msidata <- msidata[,!duplicated(coord(msidata))]
-## create full matrix to make processed imzML files compatible with segmentation and other steps
-iData(msidata) <- iData(msidata)[]
 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
 msidata\$annotation = as.factor(merged_annotation[,4])
 #end if
 ###################### calculation of data properties ################################
-@DATA_PROPERTIES@
+@DATA_PROPERTIES_INRAM@
 ## Median intensities
 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
 ## Spectra multiplied with m/z (potential number of peaks)
-numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
+numpeaks = ncol(msidata)*nrow(msidata)
 ## Percentage of intensities > 0
 percpeaks = round(npeaks/numpeaks*100, digits=2)
 ## Number of empty TICs
 TICs = colSums(spectra(msidata)[], na.rm=TRUE)
 NumemptyTIC = sum(TICs == 0)
 medTIC = round(median(TICs), digits=1)
 sdTIC = round(sd(TICs), digits=0)
 ## Median and sd # peaks per spectrum
 medpeaks = round(median(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
 sdpeaks = round(sd(colSums(spectra(msidata)[]>0, na.rm=TRUE), na.rm=TRUE), digits=0)
-print(cor(TICs,colSums(spectra(msidata)[]>0), method="pearson"))
 ## Processing informations
 centroidedinfo = centroided(msidata)
 ############## Read and filter tabular file with m/z ###########################
-### reading calibrant file:
+### reading m/z input (calibrant) file:
 #if $calibrant_file:
-calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c("","NA"), stringsAsFactors = FALSE)
+calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c(" ","","NA"), stringsAsFactors = FALSE)
 calibrant_list = calibrant_list[,c($mz_column, $name_column)]
 ### calculate how many input calibrant m/z are valid:
 inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,]
 "Intensities > 0",
 "Number of empty spectra",
 "Median TIC ± sd",
 "Median # peaks per spectrum ± sd",
 "Centroided",
-paste0("calibrants (#valid/#input) in \n", "$calibrant_file.display_name"))
+paste0("input m/z (#valid/#input) in \n", "$calibrant_file.display_name"))
 values2 = c(paste0(medint),
 paste0(percpeaks, " %"),
 paste0(NumemptyTIC),
 paste0(medTIC, " ± ", sdTIC),
 ####################### II) x-y images #######################################
 ##############################################################################
 print("x-y images")
 ## only do plots for file with intensity peaks
 if (npeaks > 0){
 ## function for density plots
 plot_colorByDensity = function(x1,x2,
 ylim=c(min(x2),max(x2)),
 xlim=c(min(x1),max(x1)),
 xlab="",ylab="",main=""){
 ## for each annotation group find last pixel, there dashed lines will be drawn in plots over spectra index
 pixel_name_df = data.frame(pixels(msidata), msidata\$annotation)
 colnames(pixel_name_df) = c("pixel_number", "pixel_name")
 last_pixel = aggregate(pixel_number~pixel_name, data = pixel_name_df, max)
 pixel_vector = last_pixel[,2]
-abline_vector = pixel_vector[1:number_combined-1]
+abline_vector = pixel_vector
-print(abline_vector)
+## remove position_df to clean up RAM space
+rm(position_df)
+gc()
 }
 ################### 1) Pixel order image ###################################
 pixelnumber = 1:pixelcount
 geom_tile() + coord_fixed()+
 ggtitle(gg_title) + theme_bw()+
 theme(plot.title = element_text(hjust = 0.5))+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"),
 space = "Lab", na.value = "black", name = "Pixel\nnumber"))
+## remove pixelxyarray to clean up RAM space
+rm(pixelxyarray)
+gc()
 ################ 2) Number of calibrants per spectrum ######################
 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not
 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0)
 }
 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE))
 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts
-mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen")
+mycolours = brewer.pal(9, "Set1")
 print(ggplot(countdf, aes(x=x, y=y, fill=countvector))+
 geom_tile() + coord_fixed() +
 ggtitle(paste0("Number of calibrants per pixel (±",$plusminus_ppm, " ppm)")) +
 theme_bw() +
 theme(plot.title = element_text(hjust = 0.5))+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 scale_fill_manual(values = mycolours[1:length(countvector)],
 na.value = "black", name = "# calibrants"))
+## remove countdf to clean up RAM space
+rm(countdf)
+gc()
 }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")}
 ########################## 3) fold change image ###########################
 #if $calibrantratio:
 #for $foldchanges in $calibrantratio:
 mass1 = $foldchanges.mass1
 mass2 = $foldchanges.mass2
-distance = $foldchanges.distance
+distance1 = $foldchanges.distance/1000000 * mass1
+distance2 = $foldchanges.distance/1000000 * mass2
 ### if user did not write a label use input m/z as label
 #if not str($foldchanges.filenameratioplot).strip():
-#set $label = "Fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2)
+#set $label = "log2 fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2)
 #else:
 #set $label = $foldchanges.filenameratioplot
 #end if
 ### filter msidata for given m/z range (for both input m/z)
-filtered_data1 = msidata[mz(msidata) >= mass1-distance & mz(msidata) <= mass1+distance,]
+filtered_data1 = msidata[mz(msidata) >= mass1-distance1 & mz(msidata) <= mass1+distance1,]
-filtered_data2 = msidata[mz(msidata) >= mass2-distance & mz(msidata) <= mass2+distance,]
+filtered_data2 = msidata[mz(msidata) >= mass2-distance2 & mz(msidata) <= mass2+distance2,]
-### find m/z in the two given ranges with the highest mean intensity
-### this two m/z will be used to calculate the fold change (red line in plot)
-maxmassrow1 = rowMeans(spectra(filtered_data1), na.rm=TRUE)
-maxmass1 = mz(filtered_data1)[which.max(maxmassrow1)]
-maxmassrow2 = rowMeans(spectra(filtered_data2), na.rm=TRUE)
-maxmass2 = mz(filtered_data2)[which.max(maxmassrow2)]
-### plot legend: chosen value in blue, distance in blue, max m/z in red
 ### m/z range for each plot (fixed range of 5 Da)
 ### xlim does not work because it does not adjust for the max. intensities within the range
 mzdown1 = features(msidata, mz = mass1-2)
 mzup1 = features(msidata, mz = mass1+3)
 mzdown2 = features(msidata, mz = mass2-2)
 mzup2 = features(msidata, mz = mass2+3)
 ### plot for first m/z
 par(mfrow=c(2,1), oma=c(0,0,2,0))
 plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("Average spectrum ", mass1, " Da"))
-abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,6,3))
+abline(v=c(mass1-distance1, mass1, mass1+distance1), col="blue",lty=c(3,6,3))
-abline(v=maxmass1, col="red", lty=5)
 ### plot for second m/z
 plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("Average spectrum ", mass2, " Da"))
-abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,6,3))
+abline(v=c(mass2-distance2, mass2, mass2+distance2), col="blue", lty=c(3,6,3))
-abline(v=maxmass2, col="red", lty=5)
 title("Control of fold change plot", outer=TRUE)
 ### filter spectra for max m/z to have two vectors, which can be divided
 ### plot spatial distribution of fold change
-### only possible when there are intensities > 0 in both given m/z ranges
+## calculate mean intensity for each m/z over the ppm range; then calculate log2 foldchange
-if (length(maxmass1)>0&length(maxmass2)>0){
+mass1vector = colMeans(spectra(filtered_data1), na.rm =TRUE)
-mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),]
+mass2vector = colMeans(spectra(filtered_data2), na.rm = TRUE)
-mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),]
+foldchange= log2(mass1vector/mass2vector)
-foldchange= log2(mass1vector/mass2vector)
+fcmatrix = cbind(foldchange, coord(msidata)[,1:2])
-fcmatrix = cbind(foldchange, coord(msidata)[,1:2])
+print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange))+
-print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange), colour=colo)+
+geom_tile() + coord_fixed()+
-geom_tile() + coord_fixed()+
+ggtitle("$label")+
-ggtitle("$label")+
+theme_bw()+
-theme_bw()+
+theme(plot.title = element_text(hjust = 0.5))+
-theme(plot.title = element_text(hjust = 0.5))+
+theme(text=element_text(family="ArialMT", face="bold", size=12))+
-theme(text=element_text(family="ArialMT", face="bold", size=12))+
+scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
-scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
+,space = "Lab", na.value = "black", name ="FC"))
-,space = "Lab", na.value = "black", name ="FC"))
-}else{
+## remove FC files to clean up RAM space
-plot(0,type='n',axes=FALSE,ann=FALSE)
+rm(fcmatrix)
-title(main=paste("At least one m/z range did not contain any intensity > 0,\n therefore no foldchange plot could be drawn"))}
+rm(filtered_data1)
+rm(filtered_data2)
+gc()
 #end for
 #end if
 #################### 4) m/z heatmaps #######################################
 for (mass in 1:length(inputcalibrants[,1])){
 image(msidata, mz=inputcalibrants[,1][mass], plusminus=plusminusvalues[mass],
 main= paste0(inputcalibrants[,2][mass], ": ", round(inputcalibrants[,1][mass], digits = 2)," (±",$plusminus_ppm, " ppm)"),
-contrast.enhance = "histogram", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy))
+contrast.enhance = "histogram", ylim= c(maximumy+0.2*maximumy,minimumy-1))
 }
 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
 #################### 5) Number of peaks per pixel - image ##################
 theme(plot.title = element_text(hjust = 0.5))+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
 ,space = "Lab", na.value = "black", name = "# peaks"))
+## remove peakscoordarray to clean up RAM space
+rm(peakscoordarray)
+gc()
 ############################### 6) TIC image ###############################
 TICcoordarray=cbind(coord(msidata)[,1:2], TICs)
 theme(plot.title = element_text(hjust = 0.5))+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
 ,space = "Lab", na.value = "black", name = "TIC"))
+## remove TICcoordarray to clean up RAM space
+rm(TICcoordarray)
+gc()
 ############################### 6b) median int image ###############################
 median_int = apply(spectra(msidata)[],2,median)
 median_coordarray=cbind(coord(msidata)[,1:2], median_int)
 print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
 geom_tile() + coord_fixed() +
-ggtitle("Median intensity per pixel")+
+ggtitle("Median intensity per spectrum")+
 theme_bw() +
 theme(plot.title = element_text(hjust = 0.5))+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
 ,space = "Lab", na.value = "black", name = "median\nintensity"))
+## remove median_coordarray to clean up RAM space
+rm(median_coordarray)
+gc()
+############################### 6c) max int image ###############################
+max_int = apply(spectra(msidata)[],2,max)
+max_coordarray=cbind(coord(msidata)[,1:2], max_int)
+print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+
+geom_tile() + coord_fixed() +
+ggtitle("Maximum intensity per spectrum")+
+theme_bw() +
+theme(plot.title = element_text(hjust = 0.5))+
+theme(text=element_text(family="ArialMT", face="bold", size=12))+
+scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
+,space = "Lab", na.value = "black", name = "max\nintensity"))
+## remove median_coordarray to clean up RAM space
+rm(max_coordarray)
+gc()
 ############################### 7) Most abundant m/z image #################
-highestmz = apply(spectra(msidata)[],2,which.max)
+## for each spectrum find the row (m/z) with the highest intensity
+highestmz = apply(spectra(msidata)[],2,which.max)
+## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted
+if (class(highestmz) == "list"){
+##find zero-length values
+zero_entry <- !(sapply(highestmz, length))
+### replace these values with NA
+highestmz[zero_entry] <- NA
+### unlist list to get a vector
+highestmz = unlist(highestmz)}
 highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz])
 colnames(highestmz_matrix)[3] = "highestmzinDa"
 print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))+
 geom_tile() + coord_fixed() +
 theme(plot.title = element_text(hjust = 0.5))+
 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z",
 limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))+
 theme(text=element_text(family="ArialMT", face="bold", size=12)))
+## remove highestmz_matrix to clean up RAM space
+rm(highestmz_matrix)
+gc()
 ########################## 8) optional pca image for two components #################
 #if $do_pca:
 set.seed(1)
 pca = PCA(msidata, ncomp=2)
+## plot overview image and plot and PC1 and 2 images
 par(mfrow = c(2,1))
 plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
-image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy))
+image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))
+for (PCs in 1:2){
+print(image(pca, column = c(paste0("PC",PCs)) , superpose = FALSE, col.regions = risk.colors(100), ylim=c(maximumy+2, minimumy-2)))}
+## remove pca to clean up RAM space
+rm(pca)
+gc()
 #end if
 ################## III) properties over spectra index ######################
 ############################################################################
 ########################## 10) TIC per spectrum ###########################
 ## 10a)density scatterplot
 par(mfrow = c(2,1), mar=c(5,6,4,2))
-plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="TIC per spectrum")
+## colorDensityplot does not work after TIC normalization, therefore make normal plot
+if (min(TICs) == max(TICs)){
+plot(pixels(msidata), TICs, ylab = "", xlab = "", pch=20, main="TIC per spectrum", col="#FF3100")
+}else{
+plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="TIC per spectrum")
+}
 title(xlab="Spectra index", line=3)
 title(ylab = "Total ion chromatogram intensity", line=4)
 if (!is.null(levels(msidata\$annotation))){
 abline(v=abline_vector, lty = 3)}
 ## 10b) histogram
-hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
+hist((TICs), main="", las=1, xlab = "TIC per spectrum", ylab="")
 title(main= "TIC per spectrum", line=2)
 title(ylab="Frequency = # spectra", line=4)
-abline(v=median(log(TICs[TICs>0])), col="blue")
+abline(v=median(TICs[TICs>0]), col="blue")
 ## 10c) additional histogram to show annotation contributions
 if (!is.null(levels(msidata\$annotation))){
-df_10 = data.frame(log(TICs), msidata\$annotation)
+df_10 = data.frame((TICs), msidata\$annotation)
 colnames(df_10) = c("TICs", "annotation")
 hist_10 = ggplot(df_10, aes(x=TICs, fill=annotation)) +
 geom_histogram()+ theme_bw()+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
 theme(plot.title = element_text(hjust = 0.5))+
 theme(legend.position="bottom",legend.direction="vertical")+
 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
-labs(title="TIC per spectrum and annotation group", x="log(TIC per spectrum)", y = "Frequency = # spectra") +
+labs(title="TIC per spectrum and annotation group", x="TIC per spectrum", y = "Frequency = # spectra") +
 guides(fill=guide_legend(ncol=5,byrow=TRUE))+
-geom_vline(xintercept = median(log(TICs[TICs>0])), size = 1, colour = "black",linetype = "dashed")
+geom_vline(xintercept = median(TICs[TICs>0]), size = 1, colour = "black",linetype = "dashed")
 print(hist_10)}
 ################################## IV) properties over m/z ####################
 ############################################################################
 print("properties over m/z")
 plot_colorByDensity(mz(msidata),mzTIC,  main= "Sum of intensities per m/z", ylab ="")
 title(xlab="m/z", line=2.5)
 title(ylab="Intensity sum", line=4)
 ## 13b) histogram
-hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
+hist(mzTIC, main="", xlab = "", las=1, ylab="")
 title(main="Sum of intensities per m/z", line=2, ylab="")
-title(xlab = "log (sum of intensities per m/z)")
+title(xlab = "sum of intensities per m/z")
 title(ylab = "Frequency", line=4)
-abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
+abline(v=median(mzTIC[mzTIC>0]), col="blue")
 ################################## V) intensity plots ########################
 ############################################################################
 print("intensity plots")
 ########################## 14) Intensity distribution ######################
 title(ylab="Median spectrum intensity", line=4)
 if (!is.null(levels(msidata\$annotation))){
 abline(v=abline_vector, lty = 3)}
 ## 14b) histogram:
-hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
+hist(spectra(msidata)[], main="", xlab = "", ylab="", las=1)
-title(main="Log2-transformed intensities", line=2)
+title(main="Intensity histogram", line=2)
-title(xlab="log2 intensities")
+title(xlab="intensities")
 title(ylab="Frequency", line=4)
-abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)]), na.rm=TRUE), col="blue")
+abline(v=median(spectra(msidata)[(spectra(msidata)>0)], na.rm=TRUE), col="blue")
 ## 14c) histogram to show contribution of annotation groups
 if (!is.null(levels(msidata\$annotation))){
 df_13 = data.frame(matrix(,ncol=2, nrow=0))
 for (subsample in levels(msidata\$annotation)){
-log2_int_subsample = log2(spectra(msidata)[,msidata\$annotation==subsample])
+log2_int_subsample = spectra(msidata)[,msidata\$annotation==subsample]
 df_subsample = data.frame(as.numeric(log2_int_subsample))
 df_subsample\$annotation = subsample
 df_13 = rbind(df_13, df_subsample)}
 df_13\$annotation = as.factor(df_13\$annotation)
-colnames(df_13) = c("logint", "annotation")
+colnames(df_13) = c("int", "annotation")
-hist_13 = ggplot(df_13, aes(x=logint, fill=annotation)) +
+hist_13 = ggplot(df_13, aes(x=int, fill=annotation)) +
 geom_histogram()+ theme_bw()+
 theme(text=element_text(family="ArialMT", face="bold", size=12))+
-labs(title="Log2-transformed intensities per sample", x="log2 intensities", y = "Frequency") +
+labs(title="Intensities per sample", x="intensities", y = "Frequency") +
 theme(plot.title = element_text(hjust = 0.5))+
 theme(legend.position="bottom",legend.direction="vertical")+
 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+
 guides(fill=guide_legend(ncol=5,byrow=TRUE))+
-geom_vline(xintercept = median(log2(spectra(msidata)[(spectra(msidata)>0)])), size = 1, colour = "black",linetype = "dashed")
+geom_vline(xintercept = median(spectra(msidata)[(spectra(msidata)>0)]), size = 1, colour = "black",linetype = "dashed")
 print(hist_13)
 ## 14d) boxplots to visualize in a different way the intensity distributions
 par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1))
 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
 for (subsample in levels(msidata\$annotation)){
 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE)
 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
-boxplot(log2(mean_matrix), ylab = "log2 mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n")
+boxplot(mean_matrix, ylab = "Mean intensity per m/z", main="Mean m/z intensities per annotation group", xaxt = "n")
 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
 ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
 corr_matrix = mean_matrix
 corr_matrix[corr_matrix == 0] <- NA
 colnames(corr_matrix) = levels(msidata\$annotation)
-corr_matrix = cor(log2(corr_matrix), method= "pearson",use="complete.obs")
+## pearson correlation is only possible if there are at least 2 groups
+if (length(colnames)>1)
-heatmap.parameters <- list(corr_matrix,
+{
-show_rownames = T, show_colnames = T,
+corr_matrix = cor(corr_matrix, method= "pearson",use="complete.obs")
-main = "Pearson correlation on mean intensities")
-do.call("pheatmap", heatmap.parameters)
+heatmap.parameters <- list(corr_matrix,
+show_rownames = T, show_colnames = T,
+main = "Pearson correlation on mean intensities")
+do.call("pheatmap", heatmap.parameters)
+}
 }
 ################################## VI) Mass spectra and m/z accuracy ########################
 ############################################################################
 print("Mass spectra and m/z accuracy")
 ############################ 15) Mass spectra ##############################
-## find three random pixel to plot their spectra in the following plots:
+## replace any NA with 0, otherwise plot function will not work at all
+msidata_no_NA = msidata
+spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0
+## find three equal m/z ranges for the average mass spectra plots:
+third_mz_range = nrow(msidata_no_NA)/3
+par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
+plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum")
+plot(msidata_no_NA[1:third_mz_range,], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
+plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
+plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], pixel = 1:ncol(msidata_no_NA), main= "Zoomed average spectrum")
+## plot 4 random mass spectra
+## find four random pixel to plot their spectra in the following plots:
 pixel1 = sample(pixelnumber,1)
 pixel2 = sample(pixelnumber,1)
 pixel3 = sample(pixelnumber,1)
+pixel4 = sample(pixelnumber,1)
-## replace any NA with 0, otherwise plot function will not work at all
-msidata_no_NA = msidata
-spectra(msidata_no_NA)[is.na(spectra(msidata_no_NA)[])] = 0
 par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
-plot(msidata_no_NA, pixel = 1:ncol(msidata_no_NA), main= "Average spectrum")
 plot(msidata_no_NA, pixel = pixel1, main=paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel1,1:2])))
-plot(msidata_no_NA, pixel = pixel2, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2])))
+plot(msidata_no_NA, pixel = pixel2, main=paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2])))
 plot(msidata_no_NA, pixel = pixel3, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel3,1:2])))
+plot(msidata_no_NA, pixel = pixel4, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel4,1:2])))
 ################### 16) Zoomed in mass spectra for calibrants ##############
 count = 1
 differencevector = numeric()
 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses)) * inputcalibrantmasses
 for (mass in 1:length(inputcalibrantmasses)){
 ### define the plot window with xmin und xmax
-minmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.5)
+minmasspixel1 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.5)
-maxmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+1.5)
+maxmasspixel1 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+1.5)
+minmasspixel2 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.25)
+maxmasspixel2 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+0.5)
+minmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-3)
+maxmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+3)
 ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
 differencevector2[mass] = round(ppmdifference2, digits=2)
 ## plotting of 4 spectra in one page
 par(mfrow = c(2, 2), oma=c(0,0,2,0))
 ## average plot
-plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "Average spectrum")
+plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], pixel = 1:length(pixelnumber), main= "Average spectrum")
 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
 abline(v=c(maxvalue), col="red", lty=2)
 abline(v=c(mzvalue), col="green2", lty=4)
 ## average plot including points per data point
-plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main="Average spectrum with data points")
+plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], pixel = 1:length(pixelnumber), main="Average spectrum with data points")
-points(mz(msidata_no_NA[minmasspixel:maxmasspixel,]), rowMeans(spectra(msidata_no_NA)[minmasspixel:maxmasspixel,]), col="blue", pch=20)
+points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(spectra(msidata_no_NA)[minmasspixel1:maxmasspixel1,]), col="blue", pch=20)
-## plot of a random pixel (1)
+## plot of third average plot
-plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel2, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2])))
+plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], pixel = 1:length(pixelnumber), main= "Average spectrum")
 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
 abline(v=c(maxvalue), col="red", lty=2)
 abline(v=c(mzvalue), col="green2", lty=4)
-## plot of a random pixel (2)
+## plot of fourth average plot
-plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel3, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel3,1:2])))
+plot(msidata_no_NA[minmasspixel3:maxmasspixel3,], pixel = 1:length(pixelnumber), main= "Average spectrum")
 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
 abline(v=c(maxvalue), col="red", lty=2)
 abline(v=c(mzvalue), col="green2", lty=4)
 title(paste0("theor. m/z: ", round(inputcalibrants[count,1], digits=4)), col.main="blue", outer=TRUE, line=0, adj=0.074)
 title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49)
 if (!is.null(levels(msidata\$annotation))){
 if (number_combined < 10){
 key_zoomed = TRUE
 }else{key_zoomed = FALSE}
 par(mfrow = c(1, 1))
-plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel=1:ncol(msidata_no_NA),main="Average spectrum per annotation group",
+plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], pixel=1:ncol(msidata_no_NA),main="Average spectrum per annotation group",
 pixel.groups=msidata\$annotation, key=key_zoomed, col=hue_pal()(number_combined),superpose=TRUE)
 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="black", lty=c(3,1,3))
 }
 count=count+1
 }
+## remove msidata_no_NA to clean up RAM space
+rm(msidata_no_NA)
+gc()
 ######### 17) ppm difference input calibrant m/z and m/z with max intensity in given m/z range#########
 par(mfrow = c(1,1))
 ### plot the ppm difference calculated above: theor. m/z value to highest m/z value:
 print(diff_plot2)
 #################### 19) ppm difference over pixels #####################
 par(mfrow = c(1,1))
-mycolours = c("darkgrey", "darkblue", "blue", "green" , "red", "orange", "yellow", "magenta", "olivedrab1", "lightseagreen")
 count = 1
 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata)))
 for (calibrant in inputcalibrantmasses){
 ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA
 filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],]
 if (nrow(filtered_data) > 0){
-### filtered for m/z range, find max peak in each spectrum (pixel)(
+### filtered for m/z range, find max peak in each spectrum
 ppm_vector = numeric()
 for (pixel_count in 1:ncol(filtered_data)){
+## for each spectrum (pixel_count) find the m/z that has the highest intensity
 mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])]
 mzdiff = mz_max - calibrant
 ppmdiff = mzdiff/calibrant*1000000
 ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot)
-if (max(spectra(filtered_data)[,pixel_count]) == 0){
+if (max(spectra(filtered_data)[,pixel_count]) == 0 || is.na(max(spectra(filtered_data)[,pixel_count]))){
 ppmdiff = NA}
 ppm_vector[pixel_count] = ppmdiff}
 }else{
 ppm_vector = rep(NA, ncol(msidata))
 for (each_cal in 1:ncol(ppm_df)){
 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")}
 legend("topright", inset=c(-0.2,0), xpd = TRUE, bty="n", cex=0.8,legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
 if (!is.null(levels(msidata\$annotation))){
 abline(v=abline_vector, lty = 3)}}
+### make x-y-images for mz accuracy
+ppm_dataframe = cbind(coord(msidata)[,1:2], ppm_df)
+for (each_cal in 1:ncol(ppm_df)){
+tmp_ppm = ppm_dataframe[,c(1,2,each_cal+2)]
+tmp_ppm[,3] = as.numeric(tmp_ppm[,3])
+colnames(tmp_ppm) = c("x","y", "ppm_each_cal")
+print(ggplot(tmp_ppm, aes(x=x, y=y, fill=ppm_each_cal))+
+geom_tile() + coord_fixed() +
+ggtitle(paste0("m/z accuracy for ",inputcalibrants[,2][each_cal]))+
+theme_bw() +
+theme(plot.title = element_text(hjust = 0.5))+
+theme(text=element_text(family="ArialMT", face="bold", size=12))+
+scale_fill_gradient2(low = "navy", mid = "white", high = "red", midpoint = 0 ,space = "Lab", na.value = "black", name = "ppm\nerror"))}
 }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")}
 }else{
 print("inputfile has no intensities > 0")
 }
 <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/>
 <param name="do_pca" type="boolean" label="PCA with 2 components"/>
 <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10">
 <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/>
 <param name="mass2" value="2222" type="float" label="M/z 2" help="Second m/z"/>
-<param name="distance" value="0.25" type="float" label="M/z range" help="Plusminus m/z window added to input m/z. In both m/z ranges the maximum intensity is used to calculate the fold change"/>
+<param name="distance" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z and intensities will be averaged in this range."/>
 <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot.">
 <sanitizer invalid_char="">
 <valid initial="string.ascii_letters,string.digits">
 <add value="_" />
 </valid>
 </sanitizer>
 </param>
 </repeat>
-<param name="pixel_output" type="boolean" label="Tabular output with spectra information"/>
 </inputs>
 <outputs>
-<data format="pdf" name="QC_report" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on ${on_string}"/>
+<data format="pdf" name="QC_report" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on ${on_string}: results"/>
 </outputs>
 <tests>
 <test>
-<expand macro="infile_imzml"/>
+<param name="infile" value="" ftype="imzml">
+<composite_data value="Example_Processed.imzML"/>
+<composite_data value="Example_Processed.ibd"/>
+</param>
 <conditional name="processed_cond">
 <param name="processed_file" value="processed"/>
 <param name="accuracy" value="200"/>
 <param name="units" value="ppm"/>
 </conditional>
 <param name="filename" value="Testfile_imzml"/>
 <param name="do_pca" value="True"/>
 <repeat name="calibrantratio">
 <param name="mass1" value="328.9"/>
 <param name="mass2" value="398.8"/>
-<param name="distance" value="0.25"/>
+<param name="distance" value="500"/>
 <param name="filenameratioplot" value = "Ratio of mass1 (328.9) / mass2 (398.8)"/>
 </repeat>
 <output name="QC_report" file="QC_imzml.pdf" compare="sim_size"/>
 </test>
 This tool uses Cardinal to read files and create a quality control report with descriptive plots for mass spectrometry imaging data.
 @MSIDATA_INPUT_DESCRIPTION@
 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before the tools analysis starts.
 @SPECTRA_TABULAR_INPUT_DESCRIPTION@
+- at least two different annotations should be in the annotation column
 @MZ_2COLS_TABULAR_INPUT_DESCRIPTION@
+- maximum of 9 m/z values per run are supported
+- names should be unique
 **Options**
 - m/z of interest (e.g. internal calibrants) and the ppm range are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy plots
 - Optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio))
 - (annot) Spatial orientation of annotated pixel: All pixels of one annotation group have the same colour.
 - Pixel order: Shows the order of the pixels in the provided file. Depending on the instrument this can represent the acquisition order. If annotation file is provided pixels are ordered according to annotation groups.
 - (cal) Number of calibrants per pixel: In every spectrum the calibrant m/z window (calibrant m/z plusminus 'ppm range') is searched for peaks (intensity > 0). Calibrants are considered present in a spectrum when they have at least one peak in their m/z window.
 - (FC) Control of fold change plot: For both input m/z a zoomed in average spectrum is drawn with the input m/z as blue dashed line, the m/z range as blue dotted lines and the maximum intensity in the m/z window with a red line.
-- (FC) Fold change image: For each spectrum the intensities of the two optimal m/z features (red lines in control plots) are divided and log2 transformed to obtain the fold change, which is then plotted as a heatmap.
+- (FC) Fold change image: For each input m/z the average intensity within the given ppm range is calculated, then the log2 fold change of both average intensities is taken and plotted as heatmap.
 - (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z. The intensities are averaged within the calibrant m/z window (ppm range).
 - Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap.
 - Total ion chromatogram: For each spectrum all intensities are summed up to obtain the TIC which is plotted as heatmap.
 - Median intensity: For each spectrum the median intensity is plotted as heatmap.
+- Maximum intensity: For each spectrum the maximum intensity is plotted as heatmap.
 - Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted.
-- PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels.
+- PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels, principal components 1 and 2 are also plotted as x-y image.
 **Properties over spectra/pixels**
 - Number of peaks per spectrum: Scatter plot and histogram showing the number of intensities > 0 for each spectrum. If annotation tabular file is provided, the pixels are sorted according to annotation groups and the dotted lines in the scatter plot separate spectra of different annotation groups.
 - (annot) Number of peaks per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group.
 - TIC per spectrum: Scatter plot and histogram showing the sum of all intensities per spectrum (TIC). Dotted lines in the scatter plot separate spectra of different annotation groups.
-- (annot) TIC per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group.
+- (annot) TIC per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group. Only the length of the coloured bar is important and not its height from zero, as bars are added up and not overlayed.
 **Properties over m/z features**
 - Histogram of m/z values: Histogram of all m/z values (complete m/z axis)
 - Number of peaks per m/z: Scatter plot and histogram giving the number of intensities > 0 for each m/z.
 - Sum of intensities per m/z: Scatter plot and histogram of the sum of all intensities per m/z.
 **Intensity plots**
 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
-- Log2-transformed intensities: Histogram of log2-transformed intensities.
+- Histogram of intensities.
-- (annot) log2-transformed intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
+- (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
 - (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot.
-- (annot) Pearson correlation between annotation groups based on mean intensities and shown as heatmap.
+- (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
 **Mass spectra and m/z accuracy**
-- Mass spectra over the full m/z range: First plot shows the average intensities over all spectra. The other three mass spectra are from random individual pixels (spectra).
+- Average mass spectra: First plot shows the average spectrum over the full m/z range, the other three plots zoom into the m/z axis.
-- (cal) For each calibrant four zoomed in mass spectrum are drawn: The first two mass spectra show the average intensities over all spectra and the other two specra are from random individual pixels. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second average spectra plot each blue plot indicates one data point.
+- (cal) For each calibrant four zoomed average mass spectrum are drawn with different zooming level. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second spectrum each blue dot indicates one data point.
 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately.
 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra.
 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups.
+- (cal) Same m/z accuracy in ppm is plotted per calibrant and per spectrum as image in x-y dimension.
 ]]>
 </help>
 <expand macro="citations"/>

Mercurial > repos > galaxyp > cardinal_quality_report

comparison quality_report.xml @ 2:d4803c1e5e19 draft