diff limma_voom.R @ 6:39fa12a6d885 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 60cae222b10f43f830936c19298bd723ac47e43d
author iuc
date Tue, 08 May 2018 18:12:40 -0400
parents d8a55b5f0de0
children e6a4ff41af6b
line wrap: on
line diff
--- a/limma_voom.R	Sat May 05 17:55:13 2018 -0400
+++ b/limma_voom.R	Tue May 08 18:12:40 2018 -0400
@@ -22,6 +22,8 @@
 #       robOpt", "b", 0, "logical"          -String specifying if robust options should be used
 #       trend", "t", 1, "double"            -Float for prior.count if limma-trend is used instead of voom
 #       weightOpt", "w", 0, "logical"       -String specifying if voomWithQualityWeights should be used
+#       volhiOpt", "v", 1, "integer"        -Integer specifying number of genes to highlight in volcano plot
+#       treatOpt", "T", 0, "logical"        -String specifying if TREAT function shuld be used
 #
 # OUT:
 #       MDS Plot
@@ -49,12 +51,8 @@
 library(scales, quietly=TRUE, warn.conflicts=FALSE)
 library(getopt, quietly=TRUE, warn.conflicts=FALSE)
 
-if (packageVersion("limma") < "3.20.1") {
-    stop("Please update 'limma' to version >= 3.20.1 to run this tool")
-}
-
 ################################################################################
-### Function Delcaration
+### Function Declaration
 ################################################################################
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
@@ -170,7 +168,9 @@
     "normOpt", "n", 1, "character",
     "robOpt", "b", 0, "logical",
     "trend", "t", 1, "double",
-    "weightOpt", "w", 0, "logical"),
+    "weightOpt", "w", 0, "logical",
+    "volhiOpt", "v", 1, "integer",
+    "treatOpt", "T", 0, "logical"),
     byrow=TRUE, ncol=4)
 opt <- getopt(spec)
 
@@ -237,6 +237,12 @@
     priorCount <- opt$trend
 }
 
+if (is.null(opt$treatOpt)) {
+    wantTreat <- FALSE
+} else {
+    wantTreat <- TRUE
+}
+
 
 if (!is.null(opt$filesPath)) {
     # Process the separate count files (adapted from DESeq2 wrapper)
@@ -311,19 +317,28 @@
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
 
+denOutPng <- makeOut("densityplots.png")
+denOutPdf <- makeOut("DensityPlots.pdf")
+boxOutPng <- makeOut("boxplots.png")
+boxOutPdf <- makeOut("BoxPlots.pdf")
+mdsOutPdf <- character() # Initialise character vector
+mdsOutPng <- character()
+for (i in 1:ncol(factors)) {
+    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+}
 
-mdsOutPdf <- makeOut("mdsplot_nonorm.pdf")
-mdsOutPng <- makeOut("mdsplot_nonorm.png")
-nmdsOutPdf <- makeOut("mdsplot.pdf")
-nmdsOutPng <- makeOut("mdsplot.png")
-mdOutPdf <- character()   # Initialise character vector
-mdOutPng <- character()
+mdOutPdf <- character()
+volOutPdf <- character()
+mdvolOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
     mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
-    mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png"))
+    volOutPdf[i] <- makeOut(paste0("volplot_", contrastData[i], ".pdf"))
+    mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", contrastData[i], ".png"))
     topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
 }
+
 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
 rdaOut <- makeOut(paste0(deMethod, "_analysis.RData"))
 sessionOut <- makeOut("session_info.txt")
@@ -382,13 +397,13 @@
 sampleanno <- data.frame("sampleID"=samplenames, factors)
 
 
-# Generating the DGEList object "data"
+# Generating the DGEList object "y"
 print("Generating DGEList object")
 data$samples <- sampleanno
 data$samples$lib.size <- colSums(data$counts)
 data$samples$norm.factors <- 1
 row.names(data$samples) <- colnames(data$counts)
-data <- new("DGEList", data)
+y <- new("DGEList", data)
 
 print("Generating Design")
 # Name rows of factors according to their sample
@@ -406,7 +421,7 @@
 
 # Calculating normalising factors
 print("Calculating Normalisation Factors")
-data <- calcNormFactors(data, method=opt$normOpt)
+y <- calcNormFactors(y, method=opt$normOpt)
 
 # Generate contrasts information
 print("Generating Contrasts")
@@ -415,25 +430,120 @@
 ################################################################################
 ### Data Output
 ################################################################################
+
+# Plot Density (if filtering low counts)
+if (filtCPM || filtSmpCount || filtTotCount) {
+    nsamples <- ncol(counts)
+
+    # PNG
+    png(denOutPng, width=1200, height=600)
+    par(mfrow=c(1,2), cex.axis=0.8)
+
+    # before filtering
+    logcpm <- cpm(counts, log=TRUE)
+    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
+    title(main="Density Plot: Raw counts", xlab="Log-cpm")
+    for (i in 2:nsamples){
+        den <- density(logcpm[,i])
+        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
+    }
+
+    # after filtering
+    logcpm <- cpm(data$counts, log=TRUE)
+    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
+    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+    for (i in 2:nsamples){
+        den <- density(logcpm[,i])
+        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
+    }
+    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
+    imgName <- "Densityplots.png"
+    imgAddr <- "densityplots.png"
+    imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+
+    # PDF
+    pdf(denOutPdf, width=14)
+    par(mfrow=c(1,2), cex.axis=0.8)
+    logcpm <- cpm(counts, log=TRUE)
+    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
+    title(main="Density Plot: Raw counts", xlab="Log-cpm")
+    for (i in 2:nsamples){
+        den <- density(logcpm[,i])
+        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
+    }
+    logcpm <- cpm(data$counts, log=TRUE)
+    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
+    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+    for (i in 2:nsamples){
+        den <- density(logcpm[,i])
+        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
+    }
+    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
+    linkName <- "DensityPlots.pdf"
+    linkAddr <- "densityplots.pdf"
+    linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+}
+
+# Plot Box plots (before and after normalisation)
+if (opt$normOpt != "none") {
+    png(boxOutPng, width=1200, height=600)
+    par(mfrow=c(1,2), cex.axis=0.8)
+    logcpm <- cpm(y$counts, log=TRUE)
+    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
+    abline(h=median(logcpm), col=4)
+    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
+    lcpm <- cpm(y, log=TRUE)
+    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
+    abline(h=median(lcpm), col=4)
+    title(main="Box Plot: Normalised counts", ylab="Log-cpm")
+    imgName <- "Boxplots.png"
+    imgAddr <- "boxplots.png"
+    imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+
+    pdf(boxOutPdf, width=14)
+    par(mfrow=c(1,2), cex.axis=0.8)
+    logcpm <- cpm(y$counts, log=TRUE)
+    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
+    abline(h=median(logcpm), col=4)
+    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
+    lcpm <- cpm(y, log=TRUE)
+    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
+    abline(h=median(lcpm), col=4)
+    title(main="Box Plot: Normalised counts", ylab="Log-cpm")
+    linkName <- "BoxPlots.pdf"
+    linkAddr <- "boxplots.pdf"
+    linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+}
+
 # Plot MDS
 print("Generating MDS plot")
 labels <- names(counts)
-png(mdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)")
-imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png")
-invisible(dev.off())
 
-pdf(mdsOutPdf)
-plotMDS(data, labels=labels, cex=0.5)
-linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf")
-invisible(dev.off())
+for (i in 1:ncol(factors)) {
+    png(mdsOutPng[i], width=600, height=600)
+    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+    imgName <- paste0("MDSPlot_", names(factors)[i], ".png")
+    imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+    imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+
+    pdf(mdsOutPdf[i])
+    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+    linkName <- paste0("MDSPlot_", names(factors)[i], ".pdf")
+    linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+    linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+    invisible(dev.off())
+}
 
 if (wantTrend) {
     # limma-trend approach
-    logCPM <- cpm(data, log=TRUE, prior.count=opt$trend)
+    logCPM <- cpm(y, log=TRUE, prior.count=opt$trend)
     fit <- lmFit(logCPM, design)
-    fit$genes <- data$genes
+    fit$genes <- y$genes
     fit <- contrasts.fit(fit, contrasts)
     if (wantRobust) {
         fit <- eBayes(fit, trend=TRUE, robust=TRUE)
@@ -446,15 +556,15 @@
 
     png(saOutPng, width=600, height=600)
     plotSA(fit, main="SA Plot")
-    imgName <- "SA Plot.png"
+    imgName <- "SAPlot.png"
     imgAddr <- "saplot.png"
     imageData <- rbind(imageData, c(imgName, imgAddr))
     invisible(dev.off())
 
     pdf(saOutPdf, width=14)
     plotSA(fit, main="SA Plot")
-    linkName <- paste0("SA Plot.pdf")
-    linkAddr <- paste0("saplot.pdf")
+    linkName <- "SAPlot.pdf"
+    linkAddr <- "saplot.pdf"
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())
 
@@ -473,16 +583,16 @@
     if (wantWeight) {
         # Creating voom data object and plot
         png(voomOutPng, width=1000, height=600)
-        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
-        imgName <- "Voom Plot.png"
+        vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
+        imgName <- "VoomPlot.png"
         imgAddr <- "voomplot.png"
         imageData <- rbind(imageData, c(imgName, imgAddr))
         invisible(dev.off())
 
         pdf(voomOutPdf, width=14)
-        vData <- voomWithQualityWeights(data, design=design, plot=TRUE)
-        linkName <- paste0("Voom Plot.pdf")
-        linkAddr <- paste0("voomplot.pdf")
+        vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
+        linkName <- "VoomPlot.pdf"
+        linkAddr <- "voomplot.pdf"
         linkData <- rbind(linkData, c(linkName, linkAddr))
         invisible(dev.off())
 
@@ -493,16 +603,16 @@
     } else {
         # Creating voom data object and plot
         png(voomOutPng, width=600, height=600)
-        vData <- voom(data, design=design, plot=TRUE)
-        imgName <- "Voom Plot"
+        vData <- voom(y, design=design, plot=TRUE)
+        imgName <- "VoomPlot"
         imgAddr <- "voomplot.png"
         imageData <- rbind(imageData, c(imgName, imgAddr))
         invisible(dev.off())
 
         pdf(voomOutPdf)
-        vData <- voom(data, design=design, plot=TRUE)
-        linkName <- paste0("Voom Plot.pdf")
-        linkAddr <- paste0("voomplot.pdf")
+        vData <- voom(y, design=design, plot=TRUE)
+        linkName <- "VoomPlot.pdf"
+        linkAddr <- "voomplot.pdf"
         linkData <- rbind(linkData, c(linkName, linkAddr))
         invisible(dev.off())
 
@@ -527,24 +637,18 @@
     plotData <- vData
 }
 
-print("Generating normalised MDS plot")
-png(nmdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)")
-imgName <- "MDS Plot (normalised)"
-imgAddr <- "mdsplot.png"
-imageData <- rbind(imageData, c(imgName, imgAddr))
-invisible(dev.off())
-
-pdf(nmdsOutPdf)
-plotMDS(plotData, labels=labels, cex=0.5)
-linkName <- paste0("MDS Plot (normalised).pdf")
-linkAddr <- paste0("mdsplot.pdf")
-linkData <- rbind(linkData, c(linkName, linkAddr))
-invisible(dev.off())
-
 
 print("Generating DE results")
+
+if (wantTreat) {
+    print("Applying TREAT method")
+    if (wantRobust) {
+        fit <- treat(fit, lfc=opt$lfcReq, robust=TRUE)
+    } else {
+        fit <- treat(fit, lfc=opt$lfcReq, robust=FALSE)
+    }
+}
+
 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
                        lfc=opt$lfcReq)
 sumStatus <- summary(status)
@@ -556,7 +660,11 @@
     flatCount[i] <- sumStatus["NotSig", i]
 
     # Write top expressions table
-    top <- topTable(fit, coef=i, number=Inf, sort.by="P")
+    if (wantTreat) {
+        top <- topTreat(fit, coef=i, number=Inf, sort.by="P")
+    } else{
+        top <- topTable(fit, coef=i, number=Inf, sort.by="P")
+    }
     write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
 
     linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
@@ -572,22 +680,55 @@
 
     abline(h=0, col="grey", lty=2)
 
-    linkName <- paste0("MD Plot_", contrastData[i], " (.pdf)")
+    linkName <- paste0("MDPlot_", contrastData[i], ".pdf")
     linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())
 
-    png(mdOutPng[i], height=600, width=600)
-    limma::plotMD(fit, status=status[, i], coef=i,
-                  main=paste("MD Plot:", unmake.names(contrastData[i])),
-                  hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
-                  xlab="Average Expression", ylab="logFC")
+    # Plot Volcano
+    pdf(volOutPdf[i])
+    if (haveAnno) {
+        # labels must be in second column currently
+        limma::volcanoplot(fit, coef=i,
+            main=paste("Volcano Plot:", unmake.names(contrastData[i])),
+            highlight=opt$volhiOpt,
+            names=fit$genes[, 2])
+    } else {
+        limma::volcanoplot(fit, coef=i,
+            main=paste("Volcano Plot:", unmake.names(contrastData[i])),,
+            highlight=opt$volhiOpt,
+            names=fit$genes$GeneID)
+    }
+    linkName <- paste0("VolcanoPlot_", contrastData[i], ".pdf")
+    linkAddr <- paste0("volplot_", contrastData[i], ".pdf")
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+
+    # PNG of MD and Volcano
+    png(mdvolOutPng[i], width=1200, height=600)
+    par(mfrow=c(1, 2), mar=c(5,4,2,2)+0.1, oma=c(0,0,3,0))
+    limma::plotMD(fit, status=status[, i], coef=i, main="MD Plot",
+                   hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+                   xlab="Average Expression", ylab="logFC")
 
     abline(h=0, col="grey", lty=2)
 
-    imgName <- paste0("MD Plot_", contrastData[i])
-    imgAddr <- paste0("mdplot_", contrastData[i], ".png")
+    # Volcano plots
+    if (haveAnno) {
+        # labels must be in second column currently
+        limma::volcanoplot(fit, coef=i, main="Volcano Plot",
+            highlight=opt$volhiOpt,
+            names=fit$genes[, 2])
+    } else {
+        limma::volcanoplot(fit, coef=i, main="Volcano Plot",
+            highlight=opt$volhiOpt,
+            names=fit$genes$GeneID)
+    }
+
+    imgName <- paste0("MDVolPlot_", contrastData[i])
+    imgAddr <- paste0("mdvolplot_", contrastData[i], ".png")
     imageData <- rbind(imageData, c(imgName, imgAddr))
+    title(paste0("Contrast: ", unmake.names(contrastData[i])), outer=TRUE, cex.main=1.5)
     invisible(dev.off())
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
@@ -597,11 +738,10 @@
 if (wantRda) {
     print("Saving RData")
     if (wantWeight) {
-      save(data, status, plotData, labels, factors, wts, fit, top, contrasts,
-           design,
+      save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrasts, design,
            file=rdaOut, ascii=TRUE)
     } else {
-      save(data, status, plotData, labels, factors, fit, top, contrasts, design,
+      save(counts, data, y, status, plotData, labels, factors, fit, top, contrasts, design,
            file=rdaOut, ascii=TRUE)
     }
     linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
@@ -626,15 +766,16 @@
 
 cata("<body>\n")
 cata("<h3>Limma Analysis Output:</h3>\n")
-cata("Links to PDF copies of plots are in 'Plots' section below />\n")
-if (wantWeight) {
-    HtmlImage(imageData$Link[1], imageData$Label[1], width=1000)
-} else {
-    HtmlImage(imageData$Link[1], imageData$Label[1])
-}
+cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")
 
-for (i in 2:nrow(imageData)) {
-    HtmlImage(imageData$Link[i], imageData$Label[i])
+for (i in 1:nrow(imageData)) {
+    if (grepl("density|box|mdvol", imageData$Link[i])) {
+        HtmlImage(imageData$Link[i], imageData$Label[i], width=1200)
+    } else if (wantWeight) {
+        HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
+    } else {
+        HtmlImage(imageData$Link[i], imageData$Label[i])
+    }
 }
 
 cata("<h4>Differential Expression Counts:</h4>\n")
@@ -719,8 +860,15 @@
 } else {
     ListItem("Weights were not applied to samples.")
 }
+if (wantTreat) {
+    ListItem(paste("Testing significance relative to a fold-change threshold (TREAT) was performed using a threshold of log2 =", opt$lfcReq, "at FDR of", opt$pValReq, "."))
+}
 if (wantRobust) {
-    ListItem("eBayes was used with robust settings (robust=TRUE).")
+    if (wantTreat) {
+        ListItem("TREAT was used with robust settings (robust=TRUE).")
+    } else {
+        ListItem("eBayes was used with robust settings (robust=TRUE).")
+    }
 }
 if (opt$pAdjOpt!="none") {
     if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {