diff lib.r @ 16:a2c49996603e draft

planemo upload commit e440674afa3e58c46100b0ac7c305a6f46ecbbdc
author lecorguille
date Wed, 19 Sep 2018 03:22:21 -0400
parents 1c30ff90f3ae
children 73d82de36369
line wrap: on
line diff
--- a/lib.r	Thu Mar 01 08:29:32 2018 -0500
+++ b/lib.r	Wed Sep 19 03:22:21 2018 -0400
@@ -1,5 +1,25 @@
 # lib.r
 
+# This function retrieve a xset like object
+#@author Gildas Le Corguille lecorguille@sb-roscoff.fr
+getxcmsSetObject <- function(xobject) {
+    # XCMS 1.x
+    if (class(xobject) == "xcmsSet")
+        return (xobject)
+    # XCMS 3.x
+    if (class(xobject) == "XCMSnExp") {
+        # Get the legacy xcmsSet object
+        suppressWarnings(xset <- as(xobject, 'xcmsSet'))
+        if (is.null(xset@phenoData$sample_group))
+            sampclass(xset) = "."
+        else
+            sampclass(xset) <- xset@phenoData$sample_group
+        if (!is.null(xset@phenoData$sample_name))
+            rownames(xset@phenoData) = xset@phenoData$sample_name
+        return (xset)
+    }
+}
+
 #@author G. Le Corguille
 #The function create a pdf from the different png generated by diffreport
 diffreport_png2pdf <- function(filebase) {
@@ -105,29 +125,29 @@
             for (n in seq(along=y)){
                 if(i+n <= length(classes)){
                     filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
-                    
-                    diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]])
-                    
+
+                    diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]],missing=0)
+
                     diffrepOri = diffrep
-                    
+
                     # renamming of the column rtmed to rt to fit with camera peaklist function output
                     colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
                     colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
-                    
+
                     # combines results and reorder columns
                     diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
                     diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
-                    
+
                     diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]])
                     diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
-                    
+
                     if(listArguments[["sortpval"]]){
                         diffrep=diffrep[order(diffrep$pvalue), ]
                     }
-                    
+
                     dir.create("tabular")
                     write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep=""))
-                    
+
                     if (listArguments[["eicmax"]] != 0) {
                         diffreport_png2pdf(filebase)
                     }
@@ -136,7 +156,6 @@
         }
     }
 
-
     # --- variableMetadata ---
     variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
     variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
@@ -302,3 +321,288 @@
     }
     return (directory)
 }
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
+# https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
+############################################################
+## getEIC
+getEIC <- function(object, mzrange, rtrange = 200,
+                   groupidx, sampleidx = sampnames(object),
+                   rt = c("corrected", "raw")) {
+    
+    files <- filepaths(object)
+    grp <- groups(object)
+    samp <- sampnames(object)
+    prof <- profinfo(object)
+    
+    rt <- match.arg(rt)
+    
+    if (is.numeric(sampleidx))
+    sampleidx <- sampnames(object)[sampleidx]
+    sampidx <- match(sampleidx, sampnames(object))
+    
+    if (!missing(groupidx)) {
+        if (is.numeric(groupidx))
+        groupidx <- groupnames(object)[unique(as.integer(groupidx))]
+        grpidx <- match(groupidx, groupnames(object, template = groupidx))
+    }
+    
+    if (missing(mzrange)) {
+        if (missing(groupidx))
+        stop("No m/z range or groups specified")
+        if (any(is.na(groupval(object, value = "mz"))))
+        warning(
+        "`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
+        "-in missing peak values. Note however that this will also ",
+        "insert intensities of 0 for peaks that can not be filled in.")
+        mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE)
+        mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE)
+        mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
+        ## if (any(is.na(groupval(object, value = "mz"))))
+        ##     stop('Please use fillPeaks() to fill up NA values !')
+        ## mzmin <- -rowMax(-groupval(object, value = "mzmin"))
+        ## mzmax <- rowMax(groupval(object, value = "mzmax"))
+        ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
+    } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
+    mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
+    else if (is.null(dim(mzrange)))
+    stop("mzrange must be a matrix")
+    colnames(mzrange) <- c("mzmin", "mzmax")
+    
+    if (length(rtrange) == 1) {
+        if (missing(groupidx))
+        rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
+        ncol = 2, byrow = TRUE)
+        else {
+            rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
+        }
+    } else if (is.null(dim(rtrange)))
+    stop("rtrange must be a matrix or single number")
+    colnames(rtrange) <- c("rtmin", "rtmax")
+    
+    ## Ensure that we've got corrected retention time if requested.
+    if (is.null(object@rt[[rt]]))
+    stop(rt, " retention times not present in 'object'!")
+    
+    ## Ensure that the defined retention time range is within the rtrange of the
+    ## object: we're using the max minimal rt of all files and the min maximal rt
+    rtrs <- lapply(object@rt[[rt]], range)
+    rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
+    min(unlist(lapply(rtrs, "[", 2))))
+    ## Check if we've got a range which is completely off:
+    if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) {
+        outs <- which(rtrange[, "rtmin"] >= rtr[2] |
+        rtrange[, "rtmax"] <= rtr[1])
+        stop(length(outs), " of the specified 'rtrange' are completely outside ",
+        "of the retention time range of 'object' which is (", rtr[1], ", ",
+        rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ",
+        rtrange[outs[1], "rtmax"], "!")
+    }
+    lower_rt_outside <- rtrange[, "rtmin"] < rtr[1]
+    upper_rt_outside <- rtrange[, "rtmax"] > rtr[2]
+    if (any(lower_rt_outside) | any(upper_rt_outside)) {
+        ## Silently fix these ranges.
+        rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
+        rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
+    }
+    
+    if (missing(groupidx))
+    gnames <- character(0)
+    else
+    gnames <- groupidx
+    
+    eic <- vector("list", length(sampleidx))
+    names(eic) <- sampleidx
+    
+    for (i in seq(along = sampidx)) {
+        
+        ## cat(sampleidx[i], "")
+        flush.console()
+        ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
+        ## stuff.
+        lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
+        currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step)
+        eic[[i]] <- currenteic@eic[[1]]
+        rm(lcraw)
+        gc()
+    }
+    ## cat("\n")
+    
+    invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
+    rt = rt, groupnames = gnames))
+}
+
+#@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
+# https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524
+# https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd
+############################################################
+## diffreport
+diffreport = function(object,
+                      class1 = levels(sampclass(object))[1],
+                      class2 = levels(sampclass(object))[2],
+                      filebase = character(),
+                      eicmax = 0, eicwidth = 200,
+                      sortpval = TRUE,
+                      classeic = c(class1,class2),
+                      value = c("into","maxo","intb"),
+                      metlin = FALSE,
+                      h = 480, w = 640, mzdec=2,
+                      missing = numeric(), ...) {
+    
+    if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
+        stop("No group information. Use group().")
+    }
+    
+    if (!is.numeric(w) || !is.numeric(h))
+        stop("'h' and 'w' have to be numeric")
+    ## require(multtest) || stop("Couldn't load multtest")
+    
+    value <- match.arg(value)
+    groupmat <- groups(object)
+    if (length(groupmat) == 0)
+    stop("No group information found")
+    samples <- sampnames(object)
+    n <- length(samples)
+    classlabel <- sampclass(object)
+    classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
+    
+    values <- groupval(object, "medret", value=value)
+    indecies <- groupval(object, "medret", value = "index")
+    
+    if (!all(c(class1,class2) %in% classlabel))
+    stop("Incorrect Class Labels")
+    
+    ## c1 and c2 are column indices of class1 and class2 resp.
+    c1 <- which(classlabel %in% class1)
+    c2 <- which(classlabel %in% class2)
+    ceic <- which(classlabel %in% classeic)
+    if (length(intersect(c1, c2)) > 0)
+    stop("Intersecting Classes")
+    
+    ## Optionally replace NA values with the value provided with missing
+    if (length(missing)) {
+        if (is.numeric(missing)) {
+            ## handles NA, Inf and -Inf
+            values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
+        } else
+        stop("'missing' should be numeric")
+    }
+    ## Check against missing Values
+    if (any(is.na(values[, c(c1, c2)])))
+    warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
+    "-in missing peak values. Note however that this will also ",
+    "insert intensities of 0 for peaks that can not be filled in.")
+    
+    mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
+    mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
+    
+    ## Calculate fold change.
+    ## For foldchange <1 set fold to 1/fold
+    ## See tstat to check which was higher
+    fold <- mean2 / mean1
+    fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
+    
+    testval <- values[,c(c1,c2)]
+    ## Replace eventual infinite values with NA (CAMERA issue #33)
+    testval[is.infinite(testval)] <- NA
+    testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
+    
+    if (min(length(c1), length(c2)) >= 2) {
+        tstat <- mt.teststat(testval, testclab, ...)
+        pvalue <- xcms:::pval(testval, testclab, tstat)
+    } else {
+        message("Too few samples per class, skipping t-test.")
+        tstat <- pvalue <- rep(NA,nrow(testval))
+    }
+    stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue)
+    if (length(levels(sampclass(object))) >2) {
+        pvalAnova<-c()
+        for(i in 1:nrow(values)){
+            var<-as.numeric(values[i,])
+            ano<-summary(aov(var ~ sampclass(object)) )
+            pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"])
+        }
+        stat<-cbind(stat, anova= pvalAnova)
+    }
+    if (metlin) {
+        neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1)
+        metlin <- abs(metlin)
+        digits <- ceiling(-log10(metlin))+1
+        metlinurl <-
+        paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=",
+        round(neutralmass - metlin, digits), "&mass_max=",
+        round(neutralmass + metlin, digits), sep="")
+        values <- cbind(metlin = metlinurl, values)
+    }
+    twosamp <- cbind(name = groupnames(object), stat, groupmat, values)
+    if (sortpval) {
+        tsidx <- order(twosamp[,"pvalue"])
+        twosamp <- twosamp[tsidx,]
+        rownames(twosamp) <- 1:nrow(twosamp)
+        values<-values[tsidx,]
+    } else
+    tsidx <- 1:nrow(values)
+    
+    if (length(filebase))
+    write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
+    
+    if (eicmax > 0) {
+        if (length(unique(peaks(object)[,"rt"])) > 1) {
+            ## This looks like "normal" LC data
+            
+            eicmax <- min(eicmax, length(tsidx))
+            eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
+            groupidx = tsidx[seq(length = eicmax)])
+            
+            if (length(filebase)) {
+                eicdir <- paste(filebase, "_eic", sep="")
+                boxdir <- paste(filebase, "_box", sep="")
+                dir.create(eicdir)
+                dir.create(boxdir)
+                if (capabilities("png")){
+                    xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
+                    sampclass(object), dirpath=boxdir, pic="png",  width=w, height=h)
+                    png(file.path(eicdir, "%003d.png"), width = w, height = h)
+                } else {
+                    xcms:::xcmsBoxPlot(values[seq(length = eicmax),],
+                    sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h)
+                    pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
+                    height = h/72, onefile = FALSE)
+                }
+            }
+            plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
+            
+            if (length(filebase))
+            dev.off()
+        } else {
+            ## This looks like a direct-infusion single spectrum
+            if (length(filebase)) {
+                eicdir <- paste(filebase, "_eic", sep="")
+                boxdir <- paste(filebase, "_box", sep="")
+                dir.create(eicdir)
+                dir.create(boxdir)
+                if (capabilities("png")){
+                    xcmsBoxPlot(values[seq(length = eicmax),],
+                    sampclass(object), dirpath=boxdir, pic="png",
+                    width=w, height=h)
+                    png(file.path(eicdir, "%003d.png"), width = w, height = h,
+                    units = "px")
+                } else {
+                    xcmsBoxPlot(values[seq(length = eicmax),],
+                    sampclass(object), dirpath=boxdir, pic="pdf",
+                    width=w, height=h)
+                    pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
+                    height = h/72, onefile = FALSE)
+                }
+            }
+            
+            plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
+            
+            if (length(filebase))
+            dev.off()
+        }
+    }
+    
+    invisible(twosamp)
+}