comparison export.class.table-color-graph.R @ 0:e13ec2c3fabe draft

planemo upload commit 25fd6a739741295e3f434e0be0286dee61e06825
author mmonsoor
date Mon, 04 Jul 2016 04:29:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e13ec2c3fabe
1 #' export.class.table
2 #'
3 #' Builds a matrix with the probability for all mass to candidate compounds
4 #' assignments, by averaging the number of assignments obtained by the gibbs sampler algorithm
5 #' or ordering the compound candidates with the likelihood matrix.
6 #'
7 #' @param gibbsL a list of attributions and probabilities from gibbs.samp function.
8 #' @param reactionM data.frame with compound annotation information.
9 #' @param molIon non redundant ion annotation.
10 #' @param probM optionally to gibbsL, a matrix of likelihoods.
11 #' @param html logical, indicating whether a html file should be generated. This parameter uses the raw data to plot EICs and may be time consuming.
12 #' @param filename html file name, the default is "test".
13 #' @param burnIn how many samples of the gibbs sampler should be discarded.
14 #' @param linkPattern which pattern should be linked to compound id, for now we have
15 #' implemented "kegg", "pubchem" and "chebi" patterns.
16 #' @param m.test statistical test to compare mean differences. This option
17 #' is only available to single acquisition mode analysis, with options
18 #' "t.test" and "anova".
19 #' @param class1 if the m.test is "t.test" first class to compare in the test,
20 #' according with xcmsSet phenoData.
21 #' @param class2 if the m.test is "t.test" second class to compare in the test,
22 #' according with xcmsSet phenoData.
23 #' @param norm logical, if TRUE performs median normalization from (Anal. Chem. 2011, 83, 5864-5872).
24 #' @param DB data.frame table used to search compounds, with the field name to be incorporated in the html table.
25 #' @param prob how to calculate the probability to attribute a mass to a compound.
26 #' Default is "count", which divide the number of times each identity was
27 #' was attributed by the number of samples. Optionally the user could
28 #' choose to use the mean of the probabilities of the identity, "mean".
29 #' @return A list with a matrix "classTable" with attributions and probabilities and
30 #' indexes of selected masses from xcms peak table.
31 #'
32 #' @export
33
34 export.class.table <- function(gibbsL=NULL, reactionM, molIon=NULL, probM=NULL, html=FALSE, filename="test", burnIn=3000, linkPattern="kegg", m.test="none", class1=NULL, class2=NULL, norm=FALSE, DB, prob="count", colorplot=FALSE, addLink=NULL) {
35
36 plotEIC <- function (xcmsObject, figidx, pngidx, colorplot, mode=NULL) {
37 dir.create(paste(filename,"_fig",sep=""))
38 gt<-groups(xcmsObject)
39 if(colorplot==TRUE){
40 gt2 <- gt[figidx,]
41 rgt <- gt2[,c("rtmin","rtmax")]
42 rgt[,1] <- rgt[,1]-100
43 rgt[,2] <- rgt[,2]+100
44 #require(doMC)
45 #registerDoMC()
46 #system.time(
47 #foreach(i=1:nrow(gt2)) %dopar% {
48 for(i in 1:nrow(gt2)){
49 groupidx1 <- which(gt[,"rtmed"] > rgt[i,1] & gt[,"rtmed"] < rgt[i,2] & gt[,"mzmed"]> gt2[i,"mzmed"]-0.1 & gt[,"mzmed"]< gt2[i,"mzmed"]+0.1)
50 eiccor <- getEIC(xcmsObject, groupidx = groupidx1)
51 png(paste(filename, "_fig/", sprintf("%003d", i), ".png", sep=""))
52 plot(eiccor, xcmsObject, groupidx = 1)
53 dev.off()
54 }
55 } else {
56 gt <- gt[figidx,]
57 rgt <- gt[,c("rtmin","rtmax")]
58 rgt[,1] <- rgt[,1]-100
59 rgt[,2] <- rgt[,2]+100
60
61 eics <- getEIC(xcmsObject, mzrange=gt, rtrange =rgt, groupidx = 1:nrow(gt))
62 png(file.path(paste(filename, "_fig/%003d.png", sep="")), height=768, width=1024)
63 #png(file.path(paste(filename, "_fig/", pngidx, sep="")), h=768, w=1024)
64 plot(eics, xcmsObject)
65 dev.off()
66 }
67 if(!is.null(mode)) {
68 pngs <- dir(paste(filename, "_fig/", sep=""))
69 if(length(grep("pos|neg" , pngs))) pngs <- pngs[-grep("pos|neg" , pngs)]
70 opng <- as.numeric(sub(".png","", pngs))
71 pngs <- pngs[order(opng)]
72 name1 <- paste(filename, "_fig/",pngs, sep="")
73 name2 <- paste(filename, "_fig/",pngidx, mode, ".png", sep="")
74 for(i in 1:length(name1)) file.rename(name1[i], name2[i])
75 }
76
77 }
78 allion <- molIon$molIon[molIon$molIon[,"isotope"]==0,]
79 ReactMatrix <- reactionM[reactionM[,5]!="unknown",]
80 x <- apply(unique(ReactMatrix[,c(2, 3)]), 2, as.numeric) # Have to look for all pairs
81 y <- as.numeric(ReactMatrix[,4])
82 prob_mean_ma <- matrix(0, nrow = length(y), ncol = nrow(x))
83 # z_average <- matrix(0, nrow = length(y), ncol = length(x))
84
85 if (!is.null(gibbsL)){
86 prob_table <- gibbsL$prob_table[,-c(1:burnIn)]
87 class_table <- gibbsL$class_table[,-c(1:burnIn)]
88 #indList <- tapply(1:nrow(ReactMatrix), as.numeric(ReactMatrix[,1]), function(x) x)
89 coords <- tapply(1:nrow(ReactMatrix), ReactMatrix[,"molIonID"], function(x) x)
90 coords2 <- unlist(lapply(coords, function(x) rep(x[1], length(x))))
91 indList <- coords[order(unique(coords2))]
92 fillMatrix <- function(j,i) {
93 idp <- which(class_table[i,] == j)
94 if(prob=="count") prob_mean_ma[j,i] <<- length(idp)/ncol(class_table)
95 if(prob=="mean") prob_mean_ma[j,i] <<- mean(prob_table[i,idp])
96 }
97
98
99 for ( i in 1:nrow(x) ) {
100
101 sapply(indList[[i]], fillMatrix, i)
102 }
103 if(sum(prob_mean_ma=="NaN")) prob_mean_ma[prob_mean_ma=="NaN"] <- 0
104 # for ( i in 1:nrow(x) ) {
105 # for ( j in 1:length(y) ) {
106 # idp <- which(class_table[i,] == j)
107 # prob_mean_ma[j,i] <- mean(prob_table[i,idp])
108 # # this is an alternative way to calculate the probabilities, should try latter, and compare results
109 # #prob_mean_ma[j,i] <- length(idp)/ncol(class_table)
110 # if ( prob_mean_ma[j,i] == "NaN" ) prob_mean_ma[j,i] <- 0
111 # }
112 # # do I still need this matrix?
113 # k=which(prob_mean_ma[,i]==max(prob_mean_ma[,i]))
114 # z_average[k[1],i]=1
115 # }
116 }
117 else {
118 prob_mean_ma <- probM
119 }
120 # think about natural probabilities
121 # prob_mean_ma[prob_mean_ma[,1]!=0,1]/sum(prob_mean_ma[prob_mean_ma[,1]!=0,1])
122 prob_mean_ma <- apply(prob_mean_ma, 2, function(x){ x[x!=0] <- x[x!=0]/sum(x[x!=0]); return(x)} )
123
124 # create a dir to figures
125 lpattern <- function(type){
126 switch(type,
127 kegg = "http://www.genome.jp/dbget-bin/www_bget?",
128 chebi = "http://www.ebi.ac.uk/chebi/searchId.do;EFB7DFF9E88306BBCD6AB78B32664A85?chebiId=",
129 pubchem = "http://www.ncbi.nlm.nih.gov/pccompound/?term="
130 )
131 }
132 linkURL <- lpattern(linkPattern)
133 fig <- paste("file://", getwd(), paste("/",filename,"_fig/",sep=""), sep="")
134 if(!is.null(molIon$cameraobj)) {
135 figidx <- c("")
136 coords <- gsub("(^\\d)","X\\1",rownames(molIon$cameraobj@xcmsSet@phenoData))
137 # experimental! Which set of characters????
138 coords <- gsub("-|\\,|~","\\.",coords)
139 coords <- gsub("\\s+","\\.",coords)
140 peaklist <- getPeaklist(molIon$cameraobj)
141 rpeaklist <- peaklist[,c("mz","rt","isotopes","adduct","pcgroup")]
142 }
143 else {
144 figidx <- c("","")
145 coordsP <- gsub("(^\\d)","X\\1",rownames(molIon$pos@xcmsSet@phenoData))
146 # experimental! Which set of characters????
147 coordsP <- gsub("-|\\,|~","\\.",coordsP)
148 coordsP <- gsub("\\s+","\\.",coordsP)
149 coordsN <- gsub("(^\\d)","X\\1",rownames(molIon$neg@xcmsSet@phenoData))
150 # experimental! Which set of characters????
151 coordsN <- gsub("-|\\,|~","\\.",coordsN)
152 coordsN <- gsub("\\s+","\\.",coordsN)
153 coords <- coordsP
154 if(length(coordsP)!=length(coordsN)) cat("\n Warning: The number of samples are different\n")
155
156 peaklistP <- getPeaklist(molIon$pos)
157 rpeaklistP <- peaklistP[,c("mz","rt","isotopes","adduct","pcgroup")]
158 peaklistN <- getPeaklist(molIon$neg)
159 rpeaklistN <- peaklistN[,c("mz","rt","isotopes","adduct","pcgroup")]
160 }
161
162 # if(sum(is.na(peaklist))) {
163 # cat("\nWarning: NAs Found in peaklist\n\nSubstituting for \"ones\"\n")
164 # na.ids <- which(is.na(peaklist),arr.ind=TRUE)
165 # for(l in 1:nrow(na.ids)){
166 # peaklist[na.ids[l,][1], na.ids[l,][2]] <- 1
167 # }
168 # }
169 #
170
171 ans <- matrix("", nrow=1, ncol=7+length(coords))
172 unq <- unique(ReactMatrix[,2:3])
173 for (i in 1:nrow(unq)) {
174 coord <- which(ReactMatrix[,2]==unq[i,1] & ReactMatrix[,3]==unq[i,2])
175 coord2 <- which(allion[,2]==unq[i,1] & allion[,1]==unq[i,2])
176 # idx2 <- unique(which(allion[,1] %in% reactionM[reactionM[,5]=="unknown",2]))
177 # work with the higher intensities for a given ion annotation, not necessarily the right one
178
179 if(!is.null(molIon$cameraobj)) {
180 idx <- as.vector(unlist(sapply(allion[coord2,"trace"],
181 function(x) {
182 x <- as.matrix(x)
183 raw <- strsplit(x,";")[[1]]
184 mraw <- apply(peaklist[raw, coords], 1, mean)
185 raw[which.max(mraw)]
186 }
187
188 )
189 )
190 )
191
192 idx <- unique(idx)
193 figidx <- append(figidx,idx)
194 }
195 else {
196 idx <- c()
197
198 for(l in 1:nrow( allion[coord2,c("trace","comb")])) {
199 x <- as.matrix(allion[coord2,c("trace","comb")][l,])
200 raw <- strsplit(x[1],";")[[1]]
201 if(x[2]!="neg"){
202 mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE)
203 }
204 else {
205
206 mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE)
207 }
208 idx <- c(idx, raw[which.max(mraw)])
209 }
210
211
212 idx <- unique(idx)
213 figidx <- rbind(figidx,c(idx,allion[coord2,"comb"][1]))
214 }
215 #figidx <- append(figidx,strsplit(allion[coord2,5], ";")[[1]][1])
216 ans1 <- matrix("", nrow=length(coord), ncol=7+length(coords))
217 ans1[,2]<-as.matrix(ReactMatrix[coord,5])
218 prob <- as.matrix(prob_mean_ma[coord, i]) # need to change and compare a pair of mass/rt
219 # number figs
220 if ( i >= 100 ) { ans1[1,6]=i }
221 else { if ( i >= 10 ) { ans1[1,6]=paste(0,i, sep="") } else { ans1[1,6]=paste("00",i, sep="") } }
222
223 if (sum(prob)>0) {
224 #prob <- prob/sum(prob)
225 o <- order(prob, decreasing=TRUE)
226 ans1[,-6] <- ans1[o,-6]
227 ans1 <- matrix(ans1, nrow=length(o))
228 ans1[1,1] <- ReactMatrix[coord[1],3]
229 #ans1[,3] <- round(prob/min(prob[prob!=0]), 3)[o]
230 ans1[,3] <- round(prob, 3)[o]
231 if (length(prob[prob!=0])>1) {
232 entropy <- -sum(prob[prob!=0]*log(prob[prob!=0], length(prob[prob!=0])))
233 }
234 else { entropy <- 0
235 }
236 ans1[1,4] <- round(entropy, 3)
237 }
238 else {
239 ans1[1,1] <- ReactMatrix[coord[1],3]
240 ans1[1,3] <- "undef"
241 }
242
243 if(!is.null(molIon$cameraobj)) {
244 ans1[1,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
245 ans1[1,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords])
246 }
247 else {
248 if(allion[coord2,"comb"]=="pos"|allion[coord2,"comb"]=="both") {
249 ans1[1,7] <- apply(rpeaklistP[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
250 ans1[1,8:ncol(ans1)] <- as.matrix(peaklistP[idx, coordsP])
251 }
252 else {
253 ans1[1,7] <- apply(rpeaklistN[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
254 ans1[1,8:ncol(ans1)] <- as.matrix(peaklistN[idx, coordsN])
255 }
256 }
257 ans <- rbind(ans, as.matrix(ans1))
258 }
259 ans <- ans[-1,]
260 # this option should change according with the bank
261 if(html) {
262 nid <- unlist(sapply(ans[,2], function(x) which(DB$id==x)))
263 #ans[,2] <- as.character(DB$name[nid])
264 }
265 unk <- reactionM[reactionM[,5]=="unknown",]
266 ans1 <- matrix("", nrow=nrow(unk), ncol=7+length(coords))
267 ans1[,1] <- unk[,3]
268 ans1[,2] <- unk[,5]
269 for(j in 1:nrow(ans1)) {
270 i <- j + max(as.numeric(ans[,6]),na.rm=TRUE)
271 if ( i >= 100 ) { ans1[j,6]=i }
272 else { if ( i >= 10 ) { ans1[j,6]=paste(0,i, sep="") } else { ans1[j,6]=paste("00",i, sep="") } }
273 }
274 # this step try to recover ids of ion annotation for masses without database annotation
275 idx2 <- c(); #for(m in 1:nrow(allion)) if(sum(allion[m,2]==as.numeric(unk[,2])) & sum(allion[m,1]==as.numeric(unk[,3]))) idx2 <- append(idx2, m)
276 # temp changes made 03/03/2014 have to check carefuly
277 lidx <- lapply(1:nrow(allion), function(m) which(allion[m,2]==unk[,2] & allion[m,1]==unk[,3]))
278 idx2 <- which(lapply(lidx, length)>0)
279
280 if(!is.null(molIon$cameraobj)) {
281 idx <- as.vector(unlist(sapply(allion[idx2,"trace"],
282 function(x) {
283 x <- as.matrix(x)
284 raw <- strsplit(x,";")[[1]]
285 mraw <- apply(peaklist[raw, coords], 1, mean)
286 raw[which.max(mraw)]
287 }
288
289 )
290 )
291 )
292 }
293
294 else {
295 # don't know what happened here with apply
296 idx <- c()
297
298 for(i in 1:nrow( allion[idx2,c("trace","comb")])) {
299 x <- as.matrix(allion[idx2,c("trace","comb")][i,])
300 raw <- strsplit(x[1],";")[[1]]
301 if(x[2]!="neg"){
302 mraw <- apply(peaklistP[raw, coordsP], 1, mean, na.rm=TRUE)
303 }
304 else {
305
306 mraw <- apply(peaklistN[raw, coordsN], 1, mean, na.rm=TRUE)
307 }
308 idx <- c(idx, raw[which.max(mraw)])
309 }
310
311
312
313 tmpidx <- cbind(idx,allion[idx2,"comb"])
314 }
315 if(!is.null(molIon$cameraobj)) {
316 ans1[,7] <- apply(rpeaklist[idx,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
317 ans1[,8:ncol(ans1)] <- as.matrix(peaklist[idx, coords])
318 }
319 else {
320 idxP <- tmpidx[tmpidx[,2]!="neg",1]
321 ans1[1:length(idxP),7] <- apply(rpeaklistP[idxP,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
322 ans1[1:length(idxP),8:ncol(ans1)] <- as.matrix(peaklistP[idxP, coordsP])
323 idxN <- tmpidx[tmpidx[,2]=="neg",1]
324 ans1[(length(idxP)+1):nrow(ans1),7] <- apply(rpeaklistN[idxN,], 1, function(x) paste(x[c(1,2,3,4)], collapse="#"))
325 ans1[(length(idxP)+1):nrow(ans1),8:ncol(ans1)] <- as.matrix(peaklistN[idxN, coordsN])
326 }
327 ans <- rbind(ans, as.matrix(ans1))
328
329 if(!is.null(molIon$cameraobj)) {
330 figidx <- c(figidx,idx)
331 figidx <- as.numeric(figidx[-1])
332 }
333 else {
334 figidx <- rbind(figidx,tmpidx)
335 allidx <- figidx[-1,]
336 allidx <- cbind(allidx, ans[ans[,6]!="",6])
337 figidx <- as.numeric(figidx[-1,1])
338 }
339
340
341 if(m.test=="none") {
342 testname <- "none"
343 #testname <- "Formula"
344 #ans[ans[,2]!="unknown",][,5] <- as.character(DB$formula[nid])
345 }
346 if(m.test=="t.test") {
347 normalize.medFC <- function(mat) {
348 # Perform median fold change normalisation
349 # X - data set [Variables & Samples]
350 medSam <- apply(mat, 1, median)
351 medSam[which(medSam==0)] <- 0.0001
352 mat <- apply(mat, 2, function(mat, medSam){
353 medFDiSmpl <- mat/medSam
354 vec<-mat/median(medFDiSmpl)
355 return(vec)
356 }, medSam)
357 return (mat)
358 }
359 # this piece of code was copied from xcms
360 pval <- function(X, classlabel, teststat) {
361
362 n1 <- rowSums(!is.na(X[,classlabel == 0]))
363 n2 <- rowSums(!is.na(X[,classlabel == 1]))
364 A <- apply(X[,classlabel == 0], 1, sd, na.rm=TRUE)^2/n1 ## sd(t(X[,classlabel == 0]), na.rm = TRUE)^2/n1
365 B <- apply(X[,classlabel == 1], 1, sd, na.rm=TRUE)^2/n2 ## sd(t(X[,classlabel == 1]), na.rm = TRUE)^2/n2
366 df <- (A+B)^2/(A^2/(n1-1)+B^2/(n2-1))
367
368 pvalue <- 2 * (1 - pt(abs(teststat), df))
369 invisible(pvalue)
370 }
371
372 c1 <- grep(class1, molIon$cameraobj@xcmsSet@phenoData[,1])
373 c2 <- grep(class2, molIon$cameraobj@xcmsSet@phenoData[,1])
374 testclab <- c(rep(0, length(c1)), rep(1, length(c2)))
375 testval <- groupval(molIon$cameraobj@xcmsSet, "medret", "into")
376 if(norm) testval <- normalize.medFC(testval)
377 tstat <- mt.teststat(testval, testclab)
378 pvalue <- pval(testval, testclab, tstat)
379
380 #
381 # rport <- diffreport(molIon$cameraobj@xcmsSet, class1=class1, class2= class2, sortpval=FALSE)
382 # ans[ans[,6]!="",5] <- rport[figidx, "pvalue"]
383 ans[ans[,6]!="",5] <- pvalue[figidx]
384 testname <- "t.test p-value"
385 }
386 if(m.test=="anova"){
387 class <- molIon$cameraobj@xcmsSet@phenoData
388 getPvalue <- function(dataidx) {
389 aov.data <- data.frame(resp=as.numeric(peaklist[dataidx,coords]), class=class)
390 anova(aov(resp~class, aov.data))$Pr[1]
391 }
392 testname <- "anova p-value"
393 ans[ans[,6]!="",5] <- sapply(figidx, getPvalue)
394 }
395
396 header <- matrix(c("Proposed Mass","Most probable Compound","Probability","Entropy", testname,"EIC-plot", "Ion annotation",coords), nrow=1 , ncol=7+length(coords) )
397 ans <- rbind(header, ans)
398
399
400 # additional field
401 # ans <- cbind(ans[,1:2], ans[,2], ans[,3:ncol(ans)])
402 #ans[ans[,3]!="unknown",][-1,3] <- as.character(DB$sbml.id[nid])
403
404 if(html) {
405 #require(hwriter)
406 ansb <- ans
407 ans[ans[,2]!="unknown",][-1,2] <- as.character(DB$name[nid])
408 if(linkPattern=="pubchem") ansb <- ans
409
410 hyper=matrix(paste(linkURL, ansb[-1,2], sep=""),ncol=1 )
411 if(!is.null(molIon$cameraobj)) {
412 hyper1=matrix(paste(fig, ans[-1,6],".png", sep=""),ncol=1 )
413 }
414 else {
415 hyper1 <- ans[-1,6]
416 hyper1[ans[-1,6]!=""][allidx[,2]!="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]!="neg"], "pos", sep="")
417 hyper1[ans[-1,6]!=""][allidx[,2]=="neg"] <- paste(hyper1[ans[-1,6]!=""][allidx[,2]=="neg"], "neg", sep="")
418 hyper1=matrix(paste(fig, hyper1,".png", sep=""),ncol=1 )
419 }
420 p=openPage(paste(filename,".html",sep=""))
421 ans2 <- ans[,1:7]
422 link <- cbind(matrix(NA,nrow(ans2),1),rbind(NA,hyper),matrix(NA,nrow(ans2),3),rbind(NA,hyper1),matrix(NA,nrow(ans2),1))
423 # This block is responsible to add as many columns as the user
424 # wants
425 if(!is.null(addLink)){
426 for(l in 1:length(addLink$link)) {
427 link <- cbind(link, rbind(NA, addLink[[l]]))
428 }
429 for(a in 1:length(addLink$ans)) {
430 ans2 <- cbind(ans2,addLink$ans[[a]])
431 #colnames(ans2)[7+a] <- addLink$ans[[a]][1]
432 }
433 }
434 hwrite(ans2, p,row.bgcolor='#ffdc98', link=link )
435 closePage(p)
436 if(!is.null(molIon$cameraobj)) {
437 plotEIC(molIon$cameraobj@xcmsSet, figidx, ans[ans[,6]!="",6][-1], colorplot=colorplot)
438 }
439 else {
440 dataidxP <- as.numeric(allidx[allidx[,2]!="neg",1])
441 pngidxP <- allidx[allidx[,2]!="neg",3]
442 plotEIC(molIon$pos@xcmsSet, dataidxP, pngidxP, "pos", colorplot=colorplot)
443 dataidxN <- as.numeric(allidx[allidx[,2]=="neg",1])
444 pngidxN <- allidx[allidx[,2]=="neg",3]
445 plotEIC(molIon$neg@xcmsSet, dataidxN, pngidxN, "neg", colorplot=colorplot)
446 }
447
448 }
449 else {
450 ansb <- ans
451 }
452 colnames(ansb) <- ansb[1,]
453 ansb <- ansb[-1,]
454 return(list(classTable=ansb, figidx=figidx))
455 }