comparison lib.r @ 19:01459b73daf9 draft

"planemo upload commit 4fcbbcbc6d6b0a59e801870d31fe886a920ef429"
author workflow4metabolomics
date Thu, 13 Feb 2020 17:23:07 -0500
parents cb923396e70f
children b979ba5888f7
comparison
equal deleted inserted replaced
18:cb923396e70f 19:01459b73daf9
364 364
365 } 365 }
366 return (directory) 366 return (directory)
367 } 367 }
368 368
369 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
370 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
371 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
372 ############################################################
373 ## getEIC
374 getEIC <- function(object, mzrange, rtrange = 200,
375 groupidx, sampleidx = sampnames(object),
376 rt = c("corrected", "raw")) {
377
378 files <- filepaths(object)
379 grp <- groups(object)
380 samp <- sampnames(object)
381 prof <- profinfo(object)
382
383 rt <- match.arg(rt)
384
385 if (is.numeric(sampleidx))
386 sampleidx <- sampnames(object)[sampleidx]
387 sampidx <- match(sampleidx, sampnames(object))
388
389 if (!missing(groupidx)) {
390 if (is.numeric(groupidx))
391 groupidx <- groupnames(object)[unique(as.integer(groupidx))]
392 grpidx <- match(groupidx, groupnames(object, template = groupidx))
393 }
394
395 if (missing(mzrange)) {
396 if (missing(groupidx))
397 stop("No m/z range or groups specified")
398 if (any(is.na(groupval(object, value = "mz"))))
399 warning(
400 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
401 "-in missing peak values. Note however that this will also ",
402 "insert intensities of 0 for peaks that can not be filled in.")
403 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE)
404 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE)
405 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
406 ## if (any(is.na(groupval(object, value = "mz"))))
407 ## stop('Please use fillPeaks() to fill up NA values !')
408 ## mzmin <- -rowMax(-groupval(object, value = "mzmin"))
409 ## mzmax <- rowMax(groupval(object, value = "mzmax"))
410 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
411 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
412 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
413 else if (is.null(dim(mzrange)))
414 stop("mzrange must be a matrix")
415 colnames(mzrange) <- c("mzmin", "mzmax")
416
417 if (length(rtrange) == 1) {
418 if (missing(groupidx))
419 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
420 ncol = 2, byrow = TRUE)
421 else {
422 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
423 }
424 } else if (is.null(dim(rtrange)))
425 stop("rtrange must be a matrix or single number")
426 colnames(rtrange) <- c("rtmin", "rtmax")
427
428 ## Ensure that we've got corrected retention time if requested.
429 if (is.null(object@rt[[rt]]))
430 stop(rt, " retention times not present in 'object'!")
431
432 ## Ensure that the defined retention time range is within the rtrange of the
433 ## object: we're using the max minimal rt of all files and the min maximal rt
434 rtrs <- lapply(object@rt[[rt]], range)
435 rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
436 min(unlist(lapply(rtrs, "[", 2))))
437 ## Check if we've got a range which is completely off:
438 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) {
439 outs <- which(rtrange[, "rtmin"] >= rtr[2] |
440 rtrange[, "rtmax"] <= rtr[1])
441 stop(length(outs), " of the specified 'rtrange' are completely outside ",
442 "of the retention time range of 'object' which is (", rtr[1], ", ",
443 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ",
444 rtrange[outs[1], "rtmax"], "!")
445 }
446 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1]
447 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2]
448 if (any(lower_rt_outside) | any(upper_rt_outside)) {
449 ## Silently fix these ranges.
450 rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
451 rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
452 }
453
454 if (missing(groupidx))
455 gnames <- character(0)
456 else
457 gnames <- groupidx
458
459 eic <- vector("list", length(sampleidx))
460 names(eic) <- sampleidx
461
462 for (i in seq(along = sampidx)) {
463
464 ## cat(sampleidx[i], "")
465 flush.console()
466 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
467 ## stuff.
468 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
469 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step)
470 eic[[i]] <- currenteic@eic[[1]]
471 rm(lcraw)
472 gc()
473 }
474 ## cat("\n")
475
476 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
477 rt = rt, groupnames = gnames))
478 }
479
480 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
481 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
482 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
483 ############################################################
484 ## diffreport
485 diffreport = function(object,
486 class1 = levels(sampclass(object))[1],
487 class2 = levels(sampclass(object))[2],
488 filebase = character(),
489 eicmax = 0, eicwidth = 200,
490 sortpval = TRUE,
491 classeic = c(class1,class2),
492 value = c("into","maxo","intb"),
493 metlin = FALSE,
494 h = 480, w = 640, mzdec=2,
495 missing = numeric(), ...) {
496
497 if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
498 stop("No group information. Use group().")
499 }
500
501 if (!is.numeric(w) || !is.numeric(h))
502 stop("'h' and 'w' have to be numeric")
503 ## require(multtest) || stop("Couldn't load multtest")
504
505 value <- match.arg(value)
506 groupmat <- groups(object)
507 if (length(groupmat) == 0)
508 stop("No group information found")
509 samples <- sampnames(object)
510 n <- length(samples)
511 classlabel <- sampclass(object)
512 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
513
514 values <- groupval(object, "medret", value=value)
515 indecies <- groupval(object, "medret", value = "index")
516
517 if (!all(c(class1,class2) %in% classlabel))
518 stop("Incorrect Class Labels")
519
520 ## c1 and c2 are column indices of class1 and class2 resp.
521 c1 <- which(classlabel %in% class1)
522 c2 <- which(classlabel %in% class2)
523 ceic <- which(classlabel %in% classeic)
524 if (length(intersect(c1, c2)) > 0)
525 stop("Intersecting Classes")
526
527 ## Optionally replace NA values with the value provided with missing
528 if (length(missing)) {
529 if (is.numeric(missing)) {
530 ## handles NA, Inf and -Inf
531 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
532 } else
533 stop("'missing' should be numeric")
534 }
535 ## Check against missing Values
536 if (any(is.na(values[, c(c1, c2)])))
537 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
538 "-in missing peak values. Note however that this will also ",
539 "insert intensities of 0 for peaks that can not be filled in.")
540
541 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
542 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
543
544 ## Calculate fold change.
545 ## For foldchange <1 set fold to 1/fold
546 ## See tstat to check which was higher
547 fold <- mean2 / mean1
548 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
549
550 testval <- values[,c(c1,c2)]
551 ## Replace eventual infinite values with NA (CAMERA issue #33)
552 testval[is.infinite(testval)] <- NA
553 testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
554
555 if (min(length(c1), length(c2)) >= 2) {
556 tstat <- mt.teststat(testval, testclab, ...)
557 pvalue <- xcms:::pval(testval, testclab, tstat)
558 } else {
559 message("Too few samples per class, skipping t-test.")
560 tstat <- pvalue <- rep(NA,nrow(testval))
561 }
562 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue)
563 if (length(levels(sampclass(object))) >2) {
564 pvalAnova<-c()
565 for(i in 1:nrow(values)){
566 var<-as.numeric(values[i,])
567 ano<-summary(aov(var ~ sampclass(object)) )
568 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"])
569 }
570 stat<-cbind(stat, anova= pvalAnova)
571 }
572 if (metlin) {
573 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1)
574 metlin <- abs(metlin)
575 digits <- ceiling(-log10(metlin))+1
576 metlinurl <-
577 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=",
578 round(neutralmass - metlin, digits), "&mass_max=",
579 round(neutralmass + metlin, digits), sep="")
580 values <- cbind(metlin = metlinurl, values)
581 }
582 twosamp <- cbind(name = groupnames(object), stat, groupmat, values)
583 if (sortpval) {
584 tsidx <- order(twosamp[,"pvalue"])
585 twosamp <- twosamp[tsidx,]
586 rownames(twosamp) <- 1:nrow(twosamp)
587 values<-values[tsidx,]
588 } else
589 tsidx <- 1:nrow(values)
590
591 if (length(filebase))
592 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
593
594 if (eicmax > 0) {
595 if (length(unique(peaks(object)[,"rt"])) > 1) {
596 ## This looks like "normal" LC data
597
598 eicmax <- min(eicmax, length(tsidx))
599 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
600 groupidx = tsidx[seq(length = eicmax)])
601
602 if (length(filebase)) {
603 eicdir <- paste(filebase, "_eic", sep="")
604 boxdir <- paste(filebase, "_box", sep="")
605 dir.create(eicdir)
606 dir.create(boxdir)
607 if (capabilities("png")){
608 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
609 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h)
610 png(file.path(eicdir, "%003d.png"), width = w, height = h)
611 } else {
612 xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
613 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h)
614 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
615 height = h/72, onefile = FALSE)
616 }
617 }
618 plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
619
620 if (length(filebase))
621 dev.off()
622 } else {
623 ## This looks like a direct-infusion single spectrum
624 if (length(filebase)) {
625 eicdir <- paste(filebase, "_eic", sep="")
626 boxdir <- paste(filebase, "_box", sep="")
627 dir.create(eicdir)
628 dir.create(boxdir)
629 if (capabilities("png")){
630 xcmsBoxPlot(values[seq(length = eicmax),],
631 sampclass(object), dirpath=boxdir, pic="png",
632 width=w, height=h)
633 png(file.path(eicdir, "%003d.png"), width = w, height = h,
634 units = "px")
635 } else {
636 xcmsBoxPlot(values[seq(length = eicmax),],
637 sampclass(object), dirpath=boxdir, pic="pdf",
638 width=w, height=h)
639 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
640 height = h/72, onefile = FALSE)
641 }
642 }
643
644 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
645
646 if (length(filebase))
647 dev.off()
648 }
649 }
650
651 invisible(twosamp)
652 }