# HG changeset patch # User galaxyp # Date 1509444003 14400 # Node ID 845073d506a8d5e50ad6390ec6e866b513bf4817 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit fa798afa023eea1cb183c14d0242721b2c696c21 diff -r 000000000000 -r 845073d506a8 msi_qualitycontrol.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msi_qualitycontrol.xml Tue Oct 31 06:00:03 2017 -0400 @@ -0,0 +1,482 @@ + + + mass spectrometry imaging QC + + + bioconductor-cardinal + r-ggplot2 + r-rcolorbrewer + r-gridextra + r-kernsmooth + + + + + + 0 +npeaks= sum(spectra(msidata)[]>0) +## Spectra multiplied with mz (potential number of peaks) +numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) +## Percentage of intensities > 0 +percpeaks = round(npeaks/numpeaks*100, digits=2) +## Number of empty TICs +TICs = colSums(spectra(msidata)[]) +NumemptyTIC = sum(TICs == 0) + +## Processing informations +processinginfo = processingData(msidata) +centroidedinfo = processinginfo@centroided # TRUE or FALSE + +## if TRUE write processinginfo if no write FALSE + +## normalization +if (length(processinginfo@normalization) == 0) { + normalizationinfo='FALSE' +} else { + normalizationinfo=processinginfo@normalization +} +## smoothing +if (length(processinginfo@smoothing) == 0) { + smoothinginfo='FALSE' +} else { + smoothinginfo=processinginfo@smoothing +} +## baseline +if (length(processinginfo@baselineReduction) == 0) { + baselinereductioninfo='FALSE' +} else { + baselinereductioninfo=processinginfo@baselineReduction +} +## peak picking +if (length(processinginfo@peakPicking) == 0) { + peakpickinginfo='FALSE' +} else { + peakpickinginfo=processinginfo@peakPicking +} + + +## calculate how many input peptide masses are valid: +inputpeptides = input_list[input_list[,1]>minmz & input_list[,1] 0", + "Number of zero TICs", + "Preprocessing", + "Normalization", + "Smoothing", + "Baseline reduction", + "Peak picking", + "Centroided", + "# valid peptidemasses") + +values = c(paste0(maxfeatures), + paste0(minmz, " - ", maxmz), + paste0(pixelcount), + paste0(minimumx, " - ", maximumx), + paste0(minimumy, " - ", maximumy), + paste0(minint, " - ", maxint), + paste0(medint), + paste0(percpeaks, " %"), + paste0(NumemptyTIC), + paste0(" "), + paste0(normalizationinfo), + paste0(smoothinginfo), + paste0(baselinereductioninfo), + paste0(peakpickinginfo), + paste0(centroidedinfo), + paste0(length(inputmasses))) + + +property_df = data.frame(properties, values) + + +## Variables for plots +xrange = 1 +yrange = 1 +maxx = max(coord(msidata)[,1])+xrange +minx = min(coord(msidata)[,1])-xrange +maxy = max(coord(msidata)[,2])+yrange +miny = min(coord(msidata)[,2])-yrange + + +####################################### Preparation of images ######################### + +## Acquisitionorder + +pixelnumber = 1:pixelcount +pixelxyarray=cbind(coord(msidata),pixelnumber) + + +## Number of peaks per pixel +peaksperpixel = colSums(spectra(msidata)[]> 0) +peakscoordarray=cbind(coord(msidata), peaksperpixel) + +## Most abundant mz + +highestmz = apply(spectra(msidata)[],2,which.max) +highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz]) +colnames(highestmz_matrix)[3] = "highestmzinDa" + +###################################### Preparation of plots ############################ + +## function without xaxt for plots with automatic x axis +plot_colorByDensity = function(x1,x2, + ylim=c(min(x2),max(x2)), + xlim=c(min(x1),max(x1)), + xlab="",ylab="",main="") { + + df <- data.frame(x1,x2) + x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) + df\$dens <- col2rgb(x)[1,] + 1L + cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) + df\$col <- cols[df\$dens] + plot(x2~x1, data=df[order(df\$dens),], + ylim=ylim,xlim=xlim,pch=20,col=col, + cex=1,xlab=xlab,ylab=ylab,las=1, + main=main) +} + +## Number of peaks per mz - number across all pixel +peakspermz = rowSums(spectra(msidata)[] > 0 ) + +## Sum of all intensities for each mz (like TIC, but for mz instead of pixel) +mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz + + + +######################################## PDF ############################################# +########################################################################################## +########################################################################################## + + +pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12) +plot(0,type='n',axes=FALSE,ann=FALSE) +#if not $filename: + #set $filename = $infile.display_name +#end if +title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename")) + +############################# I) numbers #################################### +############################################################################# +grid.table(property_df, rows= NULL) + +############################# II) ion images ################################# +############################################################################## + +## 1) Acquisition image +(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) + +scale_y_reverse() + geom_tile() + coord_fixed() + + ggtitle("1) Order of Acquisition") + +theme_bw() + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), + space = "Lab", na.value = "black", name = "Acq")) + +## 2) Calibrant images: + + + +if (length(inputmasses) != 0) +{ for (mass in 1:length(inputmasses)) + + { + image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, + main= paste0("2",LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2), " Da)"), + contrast.enhance = "histogram") + } +} else {print("The inputpeptide masses were outside the mass range")} + +## 3) Number of peaks per pixel - image + +(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) + +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + + ggtitle("3) Number of peaks per pixel") + + theme_bw() + + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "# peaks")) + + +## 4) TIC image +TICcoordarray=cbind(coord(msidata), TICs) +colo <- colorRampPalette( +c('blue', 'cyan', 'green', 'yellow','red')) +(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) + +scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() + + ggtitle("4) Total Ion Chromatogram") + + theme_bw() + + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "TIC")) + +## 5) Most abundant mass image + +(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa)) ++scale_y_reverse(lim=c(maxy,miny)) + geom_tile() + coord_fixed() ++ ggtitle("5) Most abundant m/z in each pixel") ++ theme_bw() ++ scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", + labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]), + breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa))) ++ theme(text=element_text(family="ArialMT", face="bold", size=12))) + +## which mz are highest +highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1]) +highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1] + +secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) +secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1] + + + +## 6) pca image for two components +pca <- PCA(msidata, ncomp=2) +par(mfrow = c(2,1)) +plot(pca, col=c("black", "darkgrey"), main="6) PCA for two components") +image(pca, ylim = c(-1, maxy), col=c("black", "white")) + + +############################# III) properties over acquisition (spectra index)########## +############################################################################## + +par(mfrow = c(2,1), mar=c(5,6,4,2)) + +## 7a) number of peaks per spectrum - scatterplot +plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="7a) Number of peaks per spectrum") +title(xlab="Spectra index \n (= Acquisition time)", line=3) +title(ylab="Number of peaks", line=4) + +## 7b) number of peaks per spectrum - histogram +hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") +title(main="7b) Number of peaks per spectrum", line=2) +title(ylab="Frequency = # spectra", line=4) +abline(v=median(peaksperpixel), col="blue") + +## 8a) TIC per spectrum - density scatterplot +zero=0 +par(mfrow = c(2,1), mar=c(5,6,4,2)) +plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="8a) TIC per pixel") +title(xlab="Spectra index \n (= Acquisition time)", line=3) +title(ylab = "Total ion chromatogram intensity", line=4) + +## 8b) TIC per spectrum - histogram +hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") +title(main= "8b) TIC per spectrum", line=2) +title(ylab="Frequency = # spectra", line=4) +abline(v=median(log(TICs[TICs>0])), col="blue") + + +## 9) intensity of chosen peptides over acquisition (pixel index) + +if (length(inputmasses) != 0) +{ + + par(mfrow = c(3, 2)) + intensityvector = vector() + for (mass in 1:length(inputmasses)) + { + mznumber = features(msidata, mz = inputmasses[mass]) + intensityvector = spectra(msidata)[][mznumber,] + plot(intensityvector, main=inputnames[mass], xlab="Spectra index \n (= Acquisition time)") + } +} else {print("The inputpeptide masses were outside the mass range")} + +################################## IV) changes over mz ############################ +################################################################################### + +## 10) Number of peaks per mz + +par(mfrow = c(2,1), mar=c(5,6,4,4.5)) +## 10a) Number of peaks per mz - scatterplot +plot_colorByDensity(mz(msidata),peakspermz, main= "10a) Number of peaks for each mz", ylab ="") +title(xlab="mz in Dalton", line=2.5) +title(ylab = "Number of peaks", line=4) +axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) +mtext("Coverage of spectra [%]", 4, line=3, adj=1) + +# make plot smaller to fit axis and labels, add second y axis with % +## 10b) Number of peaks per mz - histogram +hist(peakspermz, main="", las=1, ylab="", xlab="") +title(ylab = "Frequency", line=4) +title(main="10b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2) +abline(v=median(peakspermz), col="blue") + + +## 11) Sum of intensities per mz + +par(mfrow = c(2,1), mar=c(5,6,4,2)) +# 11a) sum of intensities per mz - scatterplot +plot_colorByDensity(mz(msidata),mzTIC, main= "11a) Sum of all peak intensities for each mz", ylab ="") +title(xlab="mz in Dalton", line=2.5) +title(ylab="Intensity sum", line=4) +# 11b) sum of intensities per mz - histogram +hist(log(mzTIC), main="", xlab = "", las=1, ylab="") +title(main="11b) Sum of intensities per mz", line=2, ylab="") +title(xlab = "log (sum of intensities per mz)") +title(ylab = "Frequency", line=4) +abline(v=median(log(mzTIC[mzTIC>0])), col="blue") + + + +################################## V) general plots ############################ +################################################################################### + + +## 12) Intensity distribution + +par(mfrow = c(2,1), mar=c(5,6,4,2)) + +## 12a) Intensity histogram: +hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) +title(main="12a) Log2-transformed intensities", line=2) +title(xlab="log2 intensities") +title(ylab="Frequency", line=4) +abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") + +## 12b) Median intensity over spectra +medianint_spectra = apply(spectra(msidata), 2, median) +plot(medianint_spectra, main="12b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") +title(ylab="Median spectrum intensity", line=4) + +## 13) Mass spectra + +par(mfrow = c(2, 2)) +plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum") +plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition") +plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) +plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) + +dev.off() + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_ +- Analyze7.5 (upload hdr, img and t2m file via the "composite" function) +- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) + +Only for continuous imzML so far. + +The output of this tool contains key values and plots of the imaging data as pdf. + + ]]> + + + 10.1093/bioinformatics/btv146 + + diff -r 000000000000 -r 845073d506a8 test-data/Analyze75.hdr Binary file test-data/Analyze75.hdr has changed diff -r 000000000000 -r 845073d506a8 test-data/Analyze75.img Binary file test-data/Analyze75.img has changed diff -r 000000000000 -r 845073d506a8 test-data/Analyze75.t2m Binary file test-data/Analyze75.t2m has changed diff -r 000000000000 -r 845073d506a8 test-data/Example_Continuous.ibd Binary file test-data/Example_Continuous.ibd has changed diff -r 000000000000 -r 845073d506a8 test-data/Example_Continuous.imzML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Example_Continuous.imzML Tue Oct 31 06:00:03 2017 -0400 @@ -0,0 +1,373 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r 845073d506a8 test-data/Testfile_qualitycontrol_analyze75.pdf Binary file test-data/Testfile_qualitycontrol_analyze75.pdf has changed diff -r 000000000000 -r 845073d506a8 test-data/Testfile_qualitycontrol_imzml.pdf Binary file test-data/Testfile_qualitycontrol_imzml.pdf has changed diff -r 000000000000 -r 845073d506a8 test-data/Testfile_qualitycontrol_rdata.pdf Binary file test-data/Testfile_qualitycontrol_rdata.pdf has changed diff -r 000000000000 -r 845073d506a8 test-data/example_continousS042.RData Binary file test-data/example_continousS042.RData has changed diff -r 000000000000 -r 845073d506a8 test-data/inputpeptides.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputpeptides.csv Tue Oct 31 06:00:03 2017 -0400 @@ -0,0 +1,3 @@ +152 mass1 +328.9 mass2 +185.2 mass3 diff -r 000000000000 -r 845073d506a8 test-data/inputpeptides.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputpeptides.txt Tue Oct 31 06:00:03 2017 -0400 @@ -0,0 +1,3 @@ +854.5 mass1 +1296.7 mass2 +2000.8 mass3