Mercurial > repos > galaxyp > msi_qualitycontrol
changeset 6:5c63fe03ed9e draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_qualitycontrol commit 06c2b45d8644b1d7fc01622a5c59dcbf8886d0f1
author | galaxyp |
---|---|
date | Mon, 23 Apr 2018 17:19:35 -0400 |
parents | ac786240ef07 |
children | b86a66dd1a16 |
files | msi_qualitycontrol.xml test-data/LM8_file16output.pdf test-data/Testfile_qualitycontrol_analyze75.pdf test-data/Testfile_qualitycontrol_imzml.pdf test-data/Testfile_qualitycontrol_rdata.pdf |
diffstat | 5 files changed, 67 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_qualitycontrol.xml Sun Mar 11 10:38:43 2018 -0400 +++ b/msi_qualitycontrol.xml Mon Apr 23 17:19:35 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.4"> +<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.5"> <description> mass spectrometry imaging QC </description> @@ -243,11 +243,11 @@ ## 1) Acquisition image pixelnumber = 1:pixelcount - pixelxyarray=cbind(coord(msidata),pixelnumber) + pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber) print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) + geom_tile() + coord_fixed() - + ggtitle("1) Order of Acquisition") + + ggtitle("Order of Acquisition") +theme_bw() + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "Acq")) @@ -278,12 +278,12 @@ } countvector= as.factor(colSums(pixelmatrix>0)) - countdf= cbind(coord(msidata), countvector) + countdf= cbind(coord(msidata)[,1:2], countvector) mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen") print(ggplot(countdf, aes(x=x, y=y, fill=countvector)) + geom_tile() + coord_fixed() - + ggtitle("2) Number of calibrants per pixel") + + ggtitle("Number of calibrants per pixel") + theme_bw() + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_manual(values = mycolours[1:length(countvector)], @@ -352,7 +352,7 @@ mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),] foldchange = log2(mass1vector/mass2vector) - ratiomatrix = cbind(foldchange, coord(msidata)) + ratiomatrix = cbind(foldchange, coord(msidata)[,1:2]) print(ggplot(ratiomatrix, aes(x=x, y=y, fill=foldchange), colour=colo) + geom_tile() + coord_fixed() @@ -371,7 +371,7 @@ { for (mass in 1:length(inputmasses)) { image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton, - main= paste0("3", LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"), + main= paste0(inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"), contrast.enhance = "histogram", ylim=c(maximumy+2, 0)) } } else {print("3) The inputpeptide masses were not provided or outside the mass range")} @@ -380,23 +380,23 @@ ## 4) Number of peaks per pixel - image peaksperpixel = colSums(spectra(msidata)[]> 0) - peakscoordarray=cbind(coord(msidata), peaksperpixel) + peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel) print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) + geom_tile() + coord_fixed() - + ggtitle("4) Number of peaks per pixel") + + ggtitle("Number of peaks per pixel") + theme_bw() + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") ,space = "Lab", na.value = "black", name = "# peaks")) ## 5) TIC image - TICcoordarray=cbind(coord(msidata), TICs) + TICcoordarray=cbind(coord(msidata)[,1:2], TICs) colo = colorRampPalette( c("blue", "cyan", "green", "yellow","red")) print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) + geom_tile() + coord_fixed() - + ggtitle("5) Total Ion Chromatogram") + + ggtitle("Total Ion Chromatogram") + theme_bw() + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") @@ -405,12 +405,12 @@ ## 6) Most abundant mass image highestmz = apply(spectra(msidata)[],2,which.max) - highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz]) + highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz]) colnames(highestmz_matrix)[3] = "highestmzinDa" print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa)) + geom_tile() + coord_fixed() - + ggtitle("6) Most abundant m/z in each pixel") + + ggtitle("Most abundant m/z in each pixel") + theme_bw() + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]), @@ -427,7 +427,7 @@ ## 7) pca image for two components pca = PCA(msidata, ncomp=2) par(mfrow = c(2,1)) - plot(pca, col=c("black", "darkgrey"), main="7) PCA for two components") + plot(pca, col=c("black", "darkgrey"), main="PCA for two components") image(pca, col=c("black", "white"),ylim=c(maximumy+2, 0), strip=FALSE) @@ -437,26 +437,26 @@ par(mfrow = c(2,1), mar=c(5,6,4,2)) ## 8a) number of peaks per spectrum - scatterplot - plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="8a) Number of peaks per spectrum") + plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") title(xlab="Spectra index \n (= Acquisition time)", line=3) title(ylab="Number of peaks", line=4) ## 8b) number of peaks per spectrum - histogram hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") - title(main="8b) Number of peaks per spectrum", line=2) + title(main="Number of peaks per spectrum", line=2) title(ylab="Frequency = # spectra", line=4) abline(v=median(peaksperpixel), col="blue") ## 9a) TIC per spectrum - density scatterplot zero=0 par(mfrow = c(2,1), mar=c(5,6,4,2)) - plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="9a) TIC per pixel") + plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum") title(xlab="Spectra index \n (= Acquisition time)", line=3) title(ylab = "Total ion chromatogram intensity", line=4) ## 9b) TIC per spectrum - histogram hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") - title(main= "9b) TIC per spectrum", line=2) + title(main= "TIC per spectrum", line=2) title(ylab="Frequency = # spectra", line=4) abline(v=median(log(TICs[TICs>0])), col="blue") @@ -469,18 +469,18 @@ peakspermz = rowSums(spectra(msidata)[] > 0 ) par(mfrow = c(2,1), mar=c(5,6,4,4.5)) - ## 11a) Number of peaks per mz - scatterplot - plot_colorByDensity(mz(msidata),peakspermz, main= "11a) Number of peaks for each mz", ylab ="") + ## Number of peaks per mz - scatterplot + plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per mz", ylab ="") title(xlab="mz in Dalton", line=2.5) title(ylab = "Number of peaks", line=4) axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) mtext("Coverage of spectra [%]", 4, line=3, adj=1) # make plot smaller to fit axis and labels, add second y axis with % - ## 11b) Number of peaks per mz - histogram - hist(peakspermz, main="", las=1, ylab="") + ## Number of peaks per mz - histogram + hist(peakspermz, main="", las=1, ylab="", xlab="") title(ylab = "Frequency", line=4) - title(main="11b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2) + title(main="Number of peaks per mz", xlab = "Number of peaks per mz", line=2) abline(v=median(peakspermz), col="blue") @@ -491,12 +491,12 @@ par(mfrow = c(2,1), mar=c(5,6,4,2)) # 12a) sum of intensities per mz - scatterplot - plot_colorByDensity(mz(msidata),mzTIC, main= "12a) Sum of all peak intensities for each mz", ylab ="") + plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per mz", ylab ="") title(xlab="mz in Dalton", line=2.5) title(ylab="Intensity sum", line=4) # 12b) sum of intensities per mz - histogram hist(log(mzTIC), main="", xlab = "", las=1, ylab="") - title(main="12b) Sum of intensities per mz", line=2, ylab="") + title(main="Sum of intensities per mz", line=2, ylab="") title(xlab = "log (sum of intensities per mz)") title(ylab = "Frequency", line=4) abline(v=median(log(mzTIC[mzTIC>0])), col="blue") @@ -510,23 +510,28 @@ ## 13a) Intensity histogram: hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) - title(main="13a) Log2-transformed intensities", line=2) + title(main="Log2-transformed intensities", line=2) title(xlab="log2 intensities") title(ylab="Frequency", line=4) abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") ## 13b) Median intensity over spectra medianint_spectra = apply(spectra(msidata), 2, median) - plot(medianint_spectra, main="13b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") + plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") title(ylab="Median spectrum intensity", line=4) + ## 13c) Histogram on mz values + par(mfrow = c(1, 1)) + hist(mz(msidata), xlab = "mz in Dalton", main="Histogram of mz values") + + ## 14) Mass spectra par(mfrow = c(2, 2)) plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum") plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition") - plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) - plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) + plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2]))) + plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2]))) ## 15) Zoomed in mass spectra for calibrants plusminusvalue = $plusminus_dalton @@ -543,9 +548,9 @@ abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) plot(msidata[minmasspixel:maxmasspixel,], pixel =round(length(pixelnumber)/2, digits=0), main="pixel in middle of acquisition") abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) - plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) + plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2]))) abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) - plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) + plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2]))) abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) title(paste0(inputcalibrants[x,1]), outer=TRUE) x=x+1 @@ -553,7 +558,37 @@ }else{print("15) The inputcalibrant masses were not provided or outside the mass range")} + ## 16) ppm accuracy measured vs. theoretical calibrant mass + + if (length(inputcalibrantmasses) != 0) + { + par(mfrow = c(1, 1)) + + differencevector = vector() + + for (mass in 1:length(inputcalibrantmasses)) + {mznumber = features(msidata, mz = inputcalibrantmasses[mass]) ### this gives the featurenumber which is closest to given mz + mzvalue = mz(msidata)[mznumber] ### gives the mz in Da which is closest to the given mz (using the featurenumber) + mzdifference = inputcalibrantmasses[mass] - mzvalue ### difference in Da: theoretical value - closest mz value + ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement + differencevector[mass] = ppmdifference } + differencevector = round(differencevector, digits=2) + + ### plot the ppm difference theor. mz value to closest mz value: + + calibrant_names = as.character(calibrant_list[,2]) + diff_df = data.frame(differencevector, calibrant_names) + diff_plot<-ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_col() + theme_minimal() + + labs(title="Theoretical calibrant mz vs. closest measured mz", x="calibrants", y = "Difference in ppm")+ + geom_text(aes(label=differencevector), vjust=-0.3, size=3.5, col="blue") + + print(diff_plot) + + }else{print("16) The inputcalibrant masses were not provided or outside the mass range")} + + dev.off() + }else{ print("inputfile has no intensities > 0") dev.off() @@ -579,7 +614,7 @@ </repeat> </inputs> <outputs> - <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on $infile.display_name"/> + <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} ${on_string}"/> </outputs> <tests>