Mercurial > repos > mmonsoor > probmetab
diff export.class.table-color-graph.R @ 0:e13ec2c3fabe draft
planemo upload commit 25fd6a739741295e3f434e0be0286dee61e06825
author | mmonsoor |
---|---|
date | Mon, 04 Jul 2016 04:29:25 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export.class.table-color-graph.R Mon Jul 04 04:29:25 2016 -0400 @@ -0,0 +1,455 @@ +#' export.class.table +#' +#' Builds a matrix with the probability for all mass to candidate compounds +#' assignments, by averaging the number of assignments obtained by the gibbs sampler algorithm +#' or ordering the compound candidates with the likelihood matrix. +#' +#' @param gibbsL a list of attributions and probabilities from gibbs.samp function. +#' @param reactionM data.frame with compound annotation information. +#' @param molIon non redundant ion annotation. +#' @param probM optionally to gibbsL, a matrix of likelihoods. +#' @param html logical, indicating whether a html file should be generated. This parameter uses the raw data to plot EICs and may be time consuming. +#' @param filename html file name, the default is "test". +#' @param burnIn how many samples of the gibbs sampler should be discarded. +#' @param linkPattern which pattern should be linked to compound id, for now we have +#' implemented "kegg", "pubchem" and "chebi" patterns. +#' @param m.test statistical test to compare mean differences. This option +#' is only available to single acquisition mode analysis, with options +#' "t.test" and "anova". +#' @param class1 if the m.test is "t.test" first class to compare in the test, +#' according with xcmsSet phenoData. +#' @param class2 if the m.test is "t.test" second class to compare in the test, +#' according with xcmsSet phenoData. +#' @param norm logical, if TRUE performs median normalization from (Anal. Chem. 2011, 83, 5864-5872). +#' @param DB data.frame table used to search compounds, with the field name to be incorporated in the html table. +#' @param prob how to calculate the probability to attribute a mass to a compound. +#' Default is "count", which divide the number of times each identity was +#' was attributed by the number of samples. Optionally the user could +#' choose to use the mean of the probabilities of the identity, "mean". +#' @return A list with a matrix "classTable" with attributions and probabilities and +#' indexes of selected masses from xcms peak table. +#' +#' @export + +export.class.table <- function(gibbsL=NULL, reactionM, molIon=NULL, probM=NULL, html=FALSE, filename="test", burnIn=3000, linkPattern="kegg", m.test="none", class1=NULL, class2=NULL, norm=FALSE, DB, prob="count", colorplot=FALSE, addLink=NULL) { + + plotEIC <- function (xcmsObject, figidx, pngidx, colorplot, mode=NULL) { + dir.create(paste(filename,"_fig",sep="")) + gt<-groups(xcmsObject) + if(colorplot==TRUE){ + gt2 <- gt[figidx,] + rgt <- gt2[,c("rtmin","rtmax")] + rgt[,1] <- rgt[,1]-100 + rgt[,2] <- rgt[,2]+100 + #require(doMC) + #registerDoMC() + #system.time( + #foreach(i=1:nrow(gt2)) %dopar% { + for(i in 1:nrow(gt2)){ + groupidx1 <- which(gt[,"rtmed"] > rgt[i,1] & gt[,"rtmed"] < rgt[i,2] & gt[,"mzmed"]> gt2[i,"mzmed"]-0.1 & gt[,"mzmed"]< gt2[i,"mzmed"]+0.1) + eiccor <- getEIC(xcmsObject, groupidx = groupidx1) + png(paste(filename, "_fig/", sprintf("%003d", i), ".png", sep="")) + plot(eiccor, xcmsObject, groupidx = 1) + dev.off() + } + } else { + gt <- gt[figidx,] + rgt <- gt[,c("rtmin","rtmax")] + rgt[,1] <- rgt[,1]-100 + rgt[,2] <- rgt[,2]+100 + + eics <- getEIC(xcmsObject, mzrange=gt, rtrange =rgt, groupidx = 1:nrow(gt)) + png(file.path(paste(filename, "_fig/%003d.png", sep="")), height=768, width=1024) + #png(file.path(paste(filename, "_fig/", pngidx, sep="")), h=768, w=1024) + plot(eics, xcmsObject) + dev.off() + } + if(!is.null(mode)) { + pngs <- dir(paste(filename, "_fig/", sep="")) + if(length(grep("pos|neg" , pngs))) pngs <- pngs[-grep("pos|neg" , pngs)] + opng <- as.numeric(sub(".png","", pngs)) + pngs <- pngs[order(opng)] + name1 <- paste(filename, "_fig/",pngs, sep="") + name2 <- paste(filename, "_fig/",pngidx, mode, ".png", sep="") + for(i in 1:length(name1)) file.rename(name1[i], name2[i]) + } + + } + allion <- molIon$molIon[molIon$molIon[,"isotope"]==0,] + ReactMatrix <- reactionM[reactionM[,5]!="unknown",] + x <- apply(unique(ReactMatrix[,c(2, 3)]), 2, as.numeric) # Have to look for all pairs + y <- as.numeric(ReactMatrix[,4]) + prob_mean_ma <- matrix(0, nrow = length(y), ncol = nrow(x)) +# z_average <- matrix(0, nrow = length(y), ncol = length(x)) + + if (!is.null(gibbsL)){ + prob_table <- gibbsL$prob_table[,-c(1:burnIn)] + class_table <- gibbsL$class_table[,-c(1:burnIn)] + #indList <- tapply(1:nrow(ReactMatrix), as.numeric(ReactMatrix[,1]), function(x) x) + coords <- tapply(1:nrow(ReactMatrix), ReactMatrix[,"molIonID"], function(x) x) + coords2 <- unlist(lapply(coords, function(x) rep(x[1], length(x)))) + indList <- coords[order(unique(coords2))] + fillMatrix <- function(j,i) { + idp <- which(class_table[i,] == j) + if(prob=="count") prob_mean_ma[j,i] <<- length(idp)/ncol(class_table) + if(prob=="mean") prob_mean_ma[j,i] <<- mean(prob_table[i,idp]) + } + + + for ( i in 1:nrow(x) ) { + + sapply(indList[[i]], fillMatrix, i) + } + if(sum(prob_mean_ma=="NaN")) prob_mean_ma[prob_mean_ma=="NaN"] <- 0 +# for ( i in 1:nrow(x) ) { +# for ( j in 1:length(y) ) { +# idp <- which(class_table[i,] == j) +# prob_mean_ma[j,i] <- mean(prob_table[i,idp]) +# # this is an alternative way to calculate the probabilities, should try latter, and compare results +# #prob_mean_ma[j,i] <- length(idp)/ncol(class_table) +# if ( prob_mean_ma[j,i] == "NaN" ) prob_mean_ma[j,i] <- 0 +# } +# # do I still need this matrix? +# k=which(prob_mean_ma[,i]==max(prob_mean_ma[,i])) +# z_average[k[1],i]=1 +# } + } + else { + prob_mean_ma <- probM + } + # think about natural probabilities + # prob_mean_ma[prob_mean_ma[,1]!=0,1]/sum(prob_mean_ma[prob_mean_ma[,1]!=0,1]) + prob_mean_ma <- apply(prob_mean_ma, 2, function(x){ x[x!=0] <- x[x!=0]/sum(x[x!=0]); return(x)} ) + + # create a dir to figures + lpattern <- function(type){ + switch(type, + kegg = "http://www.genome.jp/dbget-bin/www_bget?", + chebi = "http://www.ebi.ac.uk/chebi/searchId.do;EFB7DFF9E88306BBCD6AB78B32664A85?chebiId=", + pubchem = "http://www.ncbi.nlm.nih.gov/pccompound/?term=" + ) + } + linkURL <- lpattern(linkPattern) + fig <- paste("file://", getwd(), paste("/",filename,"_fig/",sep=""), sep="") + if(!is.null(molIon$cameraobj)) { + figidx <- c("") + coords <- gsub("(^\\d)","X\\1",rownames(molIon$cameraobj@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coords <- gsub("-|\\,|~","\\.",coords) + coords <- gsub("\\s+","\\.",coords) + peaklist <- getPeaklist(molIon$cameraobj) + rpeaklist <- peaklist[,c("mz","rt","isotopes","adduct","pcgroup")] + } + else { + figidx <- c("","") + coordsP <- gsub("(^\\d)","X\\1",rownames(molIon$pos@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coordsP <- gsub("-|\\,|~","\\.",coordsP) + coordsP <- gsub("\\s+","\\.",coordsP) + coordsN <- gsub("(^\\d)","X\\1",rownames(molIon$neg@xcmsSet@phenoData)) + # experimental! Which set of characters???? + coordsN <- gsub("-|\\,|~","\\.",coordsN) + coordsN <- gsub("\\s+","\\.",coordsN) + coords <- coordsP + if(length(coordsP)!=length(coordsN)) cat("\n Warning: The number of samples are different\n") + + peaklistP <- getPeaklist(molIon$pos) + rpeaklistP <- peaklistP[,c("mz","rt","isotopes","adduct","pcgroup")] + peaklistN <- getPeaklist(molIon$neg) + rpeaklistN <- peaklistN[,c("mz","rt","isotopes","adduct","pcgroup")] + } + +# if(sum(is.na(peaklist))) { +# cat("\nWarning: NAs Found in peaklist\n\nSubstituting for \"ones\"\n") +# na.ids <- which(is.na(peaklist),arr.ind=TRUE) +# for(l in 1:nrow(na.ids)){ +# peaklist[na.ids[l,][1], na.ids[l,][2]] <- 1 +# } +# } +# + + ans <- matrix("", nrow=1, ncol=7+length(coords)) + unq <- unique(ReactMatrix[,2:3]) + for (i in 1:nrow(unq)) { + coord <- which(ReactMatrix[,2]==unq[i,1] & ReactMatrix[,3]==unq[i,2]) + coord2 <- which(allion[,2]==unq[i,1] & allion[,1]==unq[i,2]) + # idx2 <- unique(which(allion[,1] %in% reactionM[reactionM[,5]=="unknown",2])) + # work with the higher intensities for a given ion annotation, not necessarily the right one + + if(!is.null(molIon$cameraobj)) { + idx <- as.vector(unlist(sapply(allion[coord2,"trace"], + function(x) { + x <- as.matrix(x) + raw <- strsplit(x,";")[[1]] + mraw <- apply(peaklist[raw, coords], 1, mean) + raw[which.max(mraw)] + } + + ) + ) + ) + + idx <- unique(idx) + figidx <- append(figidx,idx) + } + else { + idx <- c() + + for(l in 1:nrow( allion[coord2,c("trace","comb")])) { + x <- as.matrix(allion[coord2,c("trace","comb")][l,]) + raw <- strsplit(x[1],";")[[1]] + if(x[2]!="neg"){ + mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE) + } + else { + + mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE) + } + idx <- c(idx, raw[which.max(mraw)]) + } + + + idx <- unique(idx) + figidx <- rbind(figidx,c(idx,allion[coord2,"comb"][1])) + } + #figidx <- append(figidx,strsplit(allion[coord2,5], ";")[[1]][1]) + ans1 <- matrix("", nrow=length(coord), ncol=7+length(coords)) + ans1[,2]<-as.matrix(ReactMatrix[coord,5]) + prob <- as.matrix(prob_mean_ma[coord, i]) # need to change and compare a pair of mass/rt + # number figs + if ( i >= 100 ) { ans1[1,6]=i } + else { if ( i >= 10 ) { ans1[1,6]=paste(0,i, sep="") } else { ans1[1,6]=paste("00",i, sep="") } } + + if (sum(prob)>0) { + #prob <- prob/sum(prob) + o <- order(prob, decreasing=TRUE) + ans1[,-6] <- ans1[o,-6] + ans1 <- matrix(ans1, nrow=length(o)) + ans1[1,1] <- ReactMatrix[coord[1],3] + #ans1[,3] <- round(prob/min(prob[prob!=0]), 3)[o] + ans1[,3] <- round(prob, 3)[o] + if (length(prob[prob!=0])>1) { + entropy <- -sum(prob[prob!=0]*log(prob[prob!=0], length(prob[prob!=0]))) + } + else { entropy <- 0 + } + ans1[1,4] <- round(entropy, 3) + } + else { + ans1[1,1] <- ReactMatrix[coord[1],3] + ans1[1,3] <- "undef" + } + + if(!is.null(molIon$cameraobj)) { + ans1[1,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords]) + } + else { + if(allion[coord2,"comb"]=="pos"|allion[coord2,"comb"]=="both") { + ans1[1,7] <- apply(rpeaklistP[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklistP[idx, coordsP]) + } + else { + ans1[1,7] <- apply(rpeaklistN[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1,8:ncol(ans1)] <- as.matrix(peaklistN[idx, coordsN]) + } + } + ans <- rbind(ans, as.matrix(ans1)) + } + ans <- ans[-1,] + # this option should change according with the bank + if(html) { + nid <- unlist(sapply(ans[,2], function(x) which(DB$id==x))) + #ans[,2] <- as.character(DB$name[nid]) + } + unk <- reactionM[reactionM[,5]=="unknown",] + ans1 <- matrix("", nrow=nrow(unk), ncol=7+length(coords)) + ans1[,1] <- unk[,3] + ans1[,2] <- unk[,5] + for(j in 1:nrow(ans1)) { + i <- j + max(as.numeric(ans[,6]),na.rm=TRUE) + if ( i >= 100 ) { ans1[j,6]=i } + else { if ( i >= 10 ) { ans1[j,6]=paste(0,i, sep="") } else { ans1[j,6]=paste("00",i, sep="") } } + } + # this step try to recover ids of ion annotation for masses without database annotation + idx2 <- c(); #for(m in 1:nrow(allion)) if(sum(allion[m,2]==as.numeric(unk[,2])) & sum(allion[m,1]==as.numeric(unk[,3]))) idx2 <- append(idx2, m) + # temp changes made 03/03/2014 have to check carefuly + lidx <- lapply(1:nrow(allion), function(m) which(allion[m,2]==unk[,2] & allion[m,1]==unk[,3])) + idx2 <- which(lapply(lidx, length)>0) + + if(!is.null(molIon$cameraobj)) { + idx <- as.vector(unlist(sapply(allion[idx2,"trace"], + function(x) { + x <- as.matrix(x) + raw <- strsplit(x,";")[[1]] + mraw <- apply(peaklist[raw, coords], 1, mean) + raw[which.max(mraw)] + } + + ) + ) + ) + } + + else { + # don't know what happened here with apply + idx <- c() + + for(i in 1:nrow( allion[idx2,c("trace","comb")])) { + x <- as.matrix(allion[idx2,c("trace","comb")][i,]) + raw <- strsplit(x[1],";")[[1]] + if(x[2]!="neg"){ + mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE) + } + else { + + mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE) + } + idx <- c(idx, raw[which.max(mraw)]) + } + + + + tmpidx <- cbind(idx,allion[idx2,"comb"]) + } + if(!is.null(molIon$cameraobj)) { + ans1[,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords]) + } + else { + idxP <- tmpidx[tmpidx[,2]!="neg",1] + ans1[1:length(idxP),7] <- apply(rpeaklistP[idxP,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[1:length(idxP),8:ncol(ans1)] <- as.matrix(peaklistP[idxP, coordsP]) + idxN <- tmpidx[tmpidx[,2]=="neg",1] + ans1[(length(idxP)+1):nrow(ans1),7] <- apply(rpeaklistN[idxN,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#")) + ans1[(length(idxP)+1):nrow(ans1),8:ncol(ans1)] <- as.matrix(peaklistN[idxN, coordsN]) + } + ans <- rbind(ans, as.matrix(ans1)) + + if(!is.null(molIon$cameraobj)) { + figidx <- c(figidx,idx) + figidx <- as.numeric(figidx[-1]) + } + else { + figidx <- rbind(figidx,tmpidx) + allidx <- figidx[-1,] + allidx <- cbind(allidx, ans[ans[,6]!="",6]) + figidx <- as.numeric(figidx[-1,1]) + } + + + if(m.test=="none") { + testname <- "none" + #testname <- "Formula" + #ans[ans[,2]!="unknown",][,5] <- as.character(DB$formula[nid]) + } + if(m.test=="t.test") { + normalize.medFC <- function(mat) { + # Perform median fold change normalisation + # X - data set [Variables & Samples] + medSam <- apply(mat, 1, median) + medSam[which(medSam==0)] <- 0.0001 + mat <- apply(mat, 2, function(mat, medSam){ + medFDiSmpl <- mat/medSam + vec<-mat/median(medFDiSmpl) + return(vec) + }, medSam) + return (mat) + } + # this piece of code was copied from xcms + pval <- function(X, classlabel, teststat) { + + n1 <- rowSums(!is.na(X[,classlabel == 0])) + n2 <- rowSums(!is.na(X[,classlabel == 1])) + A <- apply(X[,classlabel == 0], 1, sd, na.rm=TRUE)^2/n1 ## sd(t(X[,classlabel == 0]), na.rm = TRUE)^2/n1 + B <- apply(X[,classlabel == 1], 1, sd, na.rm=TRUE)^2/n2 ## sd(t(X[,classlabel == 1]), na.rm = TRUE)^2/n2 + df <- (A+B)^2/(A^2/(n1-1)+B^2/(n2-1)) + + pvalue <- 2 * (1 - pt(abs(teststat), df)) + invisible(pvalue) + } + + c1 <- grep(class1, molIon$cameraobj@xcmsSet@phenoData[,1]) + c2 <- grep(class2, molIon$cameraobj@xcmsSet@phenoData[,1]) + testclab <- c(rep(0, length(c1)), rep(1, length(c2))) + testval <- groupval(molIon$cameraobj@xcmsSet, "medret", "into") + if(norm) testval <- normalize.medFC(testval) + tstat <- mt.teststat(testval, testclab) + pvalue <- pval(testval, testclab, tstat) + +# +# rport <- diffreport(molIon$cameraobj@xcmsSet, class1=class1, class2= class2, sortpval=FALSE) +# ans[ans[,6]!="",5] <- rport[figidx, "pvalue"] + ans[ans[,6]!="",5] <- pvalue[figidx] + testname <- "t.test p-value" + } + if(m.test=="anova"){ + class <- molIon$cameraobj@xcmsSet@phenoData + getPvalue <- function(dataidx) { + aov.data <- data.frame(resp=as.numeric(peaklist[dataidx,coords]), class=class) + anova(aov(resp~class, aov.data))$Pr[1] + } + testname <- "anova p-value" + ans[ans[,6]!="",5] <- sapply(figidx, getPvalue) + } + + header <- matrix(c("Proposed Mass","Most probable Compound","Probability","Entropy", testname,"EIC-plot", "Ion annotation",coords), nrow=1 , ncol=7+length(coords) ) + ans <- rbind(header, ans) + + + # additional field + # ans <- cbind(ans[,1:2], ans[,2], ans[,3:ncol(ans)]) + #ans[ans[,3]!="unknown",][-1,3] <- as.character(DB$sbml.id[nid]) + + if(html) { + #require(hwriter) + ansb <- ans + ans[ans[,2]!="unknown",][-1,2] <- as.character(DB$name[nid]) + if(linkPattern=="pubchem") ansb <- ans + + hyper=matrix(paste(linkURL, ansb[-1,2], sep=""),ncol=1 ) + if(!is.null(molIon$cameraobj)) { + hyper1=matrix(paste(fig, ans[-1,6],".png", sep=""),ncol=1 ) + } + else { + hyper1 <- ans[-1,6] + hyper1[ans[-1,6]!=""][allidx[,2]!="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]!="neg"], "pos", sep="") + hyper1[ans[-1,6]!=""][allidx[,2]=="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]=="neg"], "neg", sep="") + hyper1=matrix(paste(fig, hyper1,".png", sep=""),ncol=1 ) + } + p=openPage(paste(filename,".html",sep="")) + ans2 <- ans[,1:7] + link <- cbind(matrix(NA,nrow(ans2),1),rbind(NA,hyper),matrix(NA,nrow(ans2),3),rbind(NA,hyper1),matrix(NA,nrow(ans2),1)) + # This block is responsible to add as many columns as the user + # wants + if(!is.null(addLink)){ + for(l in 1:length(addLink$link)) { + link <- cbind(link, rbind(NA, addLink[[l]])) + } + for(a in 1:length(addLink$ans)) { + ans2 <- cbind(ans2,addLink$ans[[a]]) + #colnames(ans2)[7+a] <- addLink$ans[[a]][1] + } + } + hwrite(ans2, p,row.bgcolor='#ffdc98', link=link ) + closePage(p) + if(!is.null(molIon$cameraobj)) { + plotEIC(molIon$cameraobj@xcmsSet, figidx, ans[ans[,6]!="",6][-1], colorplot=colorplot) + } + else { + dataidxP <- as.numeric(allidx[allidx[,2]!="neg",1]) + pngidxP <- allidx[allidx[,2]!="neg",3] + plotEIC(molIon$pos@xcmsSet, dataidxP, pngidxP, "pos", colorplot=colorplot) + dataidxN <- as.numeric(allidx[allidx[,2]=="neg",1]) + pngidxN <- allidx[allidx[,2]=="neg",3] + plotEIC(molIon$neg@xcmsSet, dataidxN, pngidxN, "neg", colorplot=colorplot) + } + + } + else { + ansb <- ans + } + colnames(ansb) <- ansb[1,] + ansb <- ansb[-1,] + return(list(classTable=ansb, figidx=figidx)) +}