comparison lib.r @ 16:a2c49996603e draft

planemo upload commit e440674afa3e58c46100b0ac7c305a6f46ecbbdc
author lecorguille
date Wed, 19 Sep 2018 03:22:21 -0400
parents 1c30ff90f3ae
children 73d82de36369
comparison
equal deleted inserted replaced
15:6139bfcc95cb 16:a2c49996603e
1 # lib.r 1 # lib.r
2
3 # This function retrieve a xset like object
4 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
5 getxcmsSetObject <- function(xobject) {
6 # XCMS 1.x
7 if (class(xobject) == "xcmsSet")
8 return (xobject)
9 # XCMS 3.x
10 if (class(xobject) == "XCMSnExp") {
11 # Get the legacy xcmsSet object
12 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
13 if (is.null(xset@phenoData$sample_group))
14 sampclass(xset) = "."
15 else
16 sampclass(xset) <- xset@phenoData$sample_group
17 if (!is.null(xset@phenoData$sample_name))
18 rownames(xset@phenoData) = xset@phenoData$sample_name
19 return (xset)
20 }
21 }
2 22
3 #@author G. Le Corguille 23 #@author G. Le Corguille
4 #The function create a pdf from the different png generated by diffreport 24 #The function create a pdf from the different png generated by diffreport
5 diffreport_png2pdf <- function(filebase) { 25 diffreport_png2pdf <- function(filebase) {
6 dir.create("pdf") 26 dir.create("pdf")
103 for (i in seq(along=x) ) { 123 for (i in seq(along=x) ) {
104 y=1:(length(classes)) 124 y=1:(length(classes))
105 for (n in seq(along=y)){ 125 for (n in seq(along=y)){
106 if(i+n <= length(classes)){ 126 if(i+n <= length(classes)){
107 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") 127 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
108 128
109 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]]) 129 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]],missing=0)
110 130
111 diffrepOri = diffrep 131 diffrepOri = diffrep
112 132
113 # renamming of the column rtmed to rt to fit with camera peaklist function output 133 # renamming of the column rtmed to rt to fit with camera peaklist function output
114 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt" 134 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
115 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz" 135 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
116 136
117 # combines results and reorder columns 137 # combines results and reorder columns
118 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) 138 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
119 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) 139 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
120 140
121 diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]]) 141 diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]])
122 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) 142 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
123 143
124 if(listArguments[["sortpval"]]){ 144 if(listArguments[["sortpval"]]){
125 diffrep=diffrep[order(diffrep$pvalue), ] 145 diffrep=diffrep[order(diffrep$pvalue), ]
126 } 146 }
127 147
128 dir.create("tabular") 148 dir.create("tabular")
129 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) 149 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep=""))
130 150
131 if (listArguments[["eicmax"]] != 0) { 151 if (listArguments[["eicmax"]] != 0) {
132 diffreport_png2pdf(filebase) 152 diffreport_png2pdf(filebase)
133 } 153 }
134 } 154 }
135 } 155 }
136 } 156 }
137 } 157 }
138
139 158
140 # --- variableMetadata --- 159 # --- variableMetadata ---
141 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] 160 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
142 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) 161 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
143 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) 162 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
300 cat("files_root_directory\t",directory,"\n") 319 cat("files_root_directory\t",directory,"\n")
301 320
302 } 321 }
303 return (directory) 322 return (directory)
304 } 323 }
324
325 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
326 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
327 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
328 ############################################################
329 ## getEIC
330 getEIC <- function(object, mzrange, rtrange = 200,
331 groupidx, sampleidx = sampnames(object),
332 rt = c("corrected", "raw")) {
333
334 files <- filepaths(object)
335 grp <- groups(object)
336 samp <- sampnames(object)
337 prof <- profinfo(object)
338
339 rt <- match.arg(rt)
340
341 if (is.numeric(sampleidx))
342 sampleidx <- sampnames(object)[sampleidx]
343 sampidx <- match(sampleidx, sampnames(object))
344
345 if (!missing(groupidx)) {
346 if (is.numeric(groupidx))
347 groupidx <- groupnames(object)[unique(as.integer(groupidx))]
348 grpidx <- match(groupidx, groupnames(object, template = groupidx))
349 }
350
351 if (missing(mzrange)) {
352 if (missing(groupidx))
353 stop("No m/z range or groups specified")
354 if (any(is.na(groupval(object, value = "mz"))))
355 warning(
356 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
357 "-in missing peak values. Note however that this will also ",
358 "insert intensities of 0 for peaks that can not be filled in.")
359 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE)
360 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE)
361 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
362 ## if (any(is.na(groupval(object, value = "mz"))))
363 ## stop('Please use fillPeaks() to fill up NA values !')
364 ## mzmin <- -rowMax(-groupval(object, value = "mzmin"))
365 ## mzmax <- rowMax(groupval(object, value = "mzmax"))
366 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
367 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
368 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
369 else if (is.null(dim(mzrange)))
370 stop("mzrange must be a matrix")
371 colnames(mzrange) <- c("mzmin", "mzmax")
372
373 if (length(rtrange) == 1) {
374 if (missing(groupidx))
375 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
376 ncol = 2, byrow = TRUE)
377 else {
378 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
379 }
380 } else if (is.null(dim(rtrange)))
381 stop("rtrange must be a matrix or single number")
382 colnames(rtrange) <- c("rtmin", "rtmax")
383
384 ## Ensure that we've got corrected retention time if requested.
385 if (is.null(object@rt[[rt]]))
386 stop(rt, " retention times not present in 'object'!")
387
388 ## Ensure that the defined retention time range is within the rtrange of the
389 ## object: we're using the max minimal rt of all files and the min maximal rt
390 rtrs <- lapply(object@rt[[rt]], range)
391 rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
392 min(unlist(lapply(rtrs, "[", 2))))
393 ## Check if we've got a range which is completely off:
394 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) {
395 outs <- which(rtrange[, "rtmin"] >= rtr[2] |
396 rtrange[, "rtmax"] <= rtr[1])
397 stop(length(outs), " of the specified 'rtrange' are completely outside ",
398 "of the retention time range of 'object' which is (", rtr[1], ", ",
399 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ",
400 rtrange[outs[1], "rtmax"], "!")
401 }
402 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1]
403 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2]
404 if (any(lower_rt_outside) | any(upper_rt_outside)) {
405 ## Silently fix these ranges.
406 rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
407 rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
408 }
409
410 if (missing(groupidx))
411 gnames <- character(0)
412 else
413 gnames <- groupidx
414
415 eic <- vector("list", length(sampleidx))
416 names(eic) <- sampleidx
417
418 for (i in seq(along = sampidx)) {
419
420 ## cat(sampleidx[i], "")
421 flush.console()
422 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
423 ## stuff.
424 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
425 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step)
426 eic[[i]] <- currenteic@eic[[1]]
427 rm(lcraw)
428 gc()
429 }
430 ## cat("\n")
431
432 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
433 rt = rt, groupnames = gnames))
434 }
435
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
437 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
438 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
439 ############################################################
440 ## diffreport
441 diffreport = function(object,
442 class1 = levels(sampclass(object))[1],
443 class2 = levels(sampclass(object))[2],
444 filebase = character(),
445 eicmax = 0, eicwidth = 200,
446 sortpval = TRUE,
447 classeic = c(class1,class2),
448 value = c("into","maxo","intb"),
449 metlin = FALSE,
450 h = 480, w = 640, mzdec=2,
451 missing = numeric(), ...) {
452
453 if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
454 stop("No group information. Use group().")
455 }
456
457 if (!is.numeric(w) || !is.numeric(h))
458 stop("'h' and 'w' have to be numeric")
459 ## require(multtest) || stop("Couldn't load multtest")
460
461 value <- match.arg(value)
462 groupmat <- groups(object)
463 if (length(groupmat) == 0)
464 stop("No group information found")
465 samples <- sampnames(object)
466 n <- length(samples)
467 classlabel <- sampclass(object)
468 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
469
470 values <- groupval(object, "medret", value=value)
471 indecies <- groupval(object, "medret", value = "index")
472
473 if (!all(c(class1,class2) %in% classlabel))
474 stop("Incorrect Class Labels")
475
476 ## c1 and c2 are column indices of class1 and class2 resp.
477 c1 <- which(classlabel %in% class1)
478 c2 <- which(classlabel %in% class2)
479 ceic <- which(classlabel %in% classeic)
480 if (length(intersect(c1, c2)) > 0)
481 stop("Intersecting Classes")
482
483 ## Optionally replace NA values with the value provided with missing
484 if (length(missing)) {
485 if (is.numeric(missing)) {
486 ## handles NA, Inf and -Inf
487 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
488 } else
489 stop("'missing' should be numeric")
490 }
491 ## Check against missing Values
492 if (any(is.na(values[, c(c1, c2)])))
493 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
494 "-in missing peak values. Note however that this will also ",
495 "insert intensities of 0 for peaks that can not be filled in.")
496
497 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
498 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
499
500 ## Calculate fold change.
501 ## For foldchange <1 set fold to 1/fold
502 ## See tstat to check which was higher
503 fold <- mean2 / mean1
504 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
505
506 testval <- values[,c(c1,c2)]
507 ## Replace eventual infinite values with NA (CAMERA issue #33)
508 testval[is.infinite(testval)] <- NA
509 testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
510
511 if (min(length(c1), length(c2)) >= 2) {
512 tstat <- mt.teststat(testval, testclab, ...)
513 pvalue <- xcms:::pval(testval, testclab, tstat)
514 } else {
515 message("Too few samples per class, skipping t-test.")
516 tstat <- pvalue <- rep(NA,nrow(testval))
517 }
518 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue)
519 if (length(levels(sampclass(object))) >2) {
520 pvalAnova<-c()
521 for(i in 1:nrow(values)){
522 var<-as.numeric(values[i,])
523 ano<-summary(aov(var ~ sampclass(object)) )
524 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"])
525 }
526 stat<-cbind(stat, anova= pvalAnova)
527 }
528 if (metlin) {
529 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1)
530 metlin <- abs(metlin)
531 digits <- ceiling(-log10(metlin))+1
532 metlinurl <-
533 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=",
534 round(neutralmass - metlin, digits), "&mass_max=",
535 round(neutralmass + metlin, digits), sep="")
536 values <- cbind(metlin = metlinurl, values)
537 }
538 twosamp <- cbind(name = groupnames(object), stat, groupmat, values)
539 if (sortpval) {
540 tsidx <- order(twosamp[,"pvalue"])
541 twosamp <- twosamp[tsidx,]
542 rownames(twosamp) <- 1:nrow(twosamp)
543 values<-values[tsidx,]
544 } else
545 tsidx <- 1:nrow(values)
546
547 if (length(filebase))
548 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
549
550 if (eicmax > 0) {
551 if (length(unique(peaks(object)[,"rt"])) > 1) {
552 ## This looks like "normal" LC data
553
554 eicmax <- min(eicmax, length(tsidx))
555 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
556 groupidx = tsidx[seq(length = eicmax)])
557
558 if (length(filebase)) {
559 eicdir <- paste(filebase, "_eic", sep="")
560 boxdir <- paste(filebase, "_box", sep="")
561 dir.create(eicdir)
562 dir.create(boxdir)
563 if (capabilities("png")){
564 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
565 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h)
566 png(file.path(eicdir, "%003d.png"), width = w, height = h)
567 } else {
568 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
569 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h)
570 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
571 height = h/72, onefile = FALSE)
572 }
573 }
574 plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
575
576 if (length(filebase))
577 dev.off()
578 } else {
579 ## This looks like a direct-infusion single spectrum
580 if (length(filebase)) {
581 eicdir <- paste(filebase, "_eic", sep="")
582 boxdir <- paste(filebase, "_box", sep="")
583 dir.create(eicdir)
584 dir.create(boxdir)
585 if (capabilities("png")){
586 xcmsBoxPlot(values[seq(length = eicmax),],
587 sampclass(object), dirpath=boxdir, pic="png",
588 width=w, height=h)
589 png(file.path(eicdir, "%003d.png"), width = w, height = h,
590 units = "px")
591 } else {
592 xcmsBoxPlot(values[seq(length = eicmax),],
593 sampclass(object), dirpath=boxdir, pic="pdf",
594 width=w, height=h)
595 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
596 height = h/72, onefile = FALSE)
597 }
598 }
599
600 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
601
602 if (length(filebase))
603 dev.off()
604 }
605 }
606
607 invisible(twosamp)
608 }