diff limma_voom.R @ 17:86b3df7db58b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 42b1160a549a85c87ed5226a83b55c4e44648597
author iuc
date Mon, 18 Feb 2019 17:49:24 -0500
parents 5d903d528193
children 97e06a4c7c75
line wrap: on
line diff
--- a/limma_voom.R	Sat Feb 09 07:35:04 2019 -0500
+++ b/limma_voom.R	Mon Feb 18 17:49:24 2019 -0500
@@ -297,35 +297,43 @@
 
 } else {
     # Process the single count matrix
-    counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE)
+    counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE, check.names=FALSE)
     row.names(counts) <- counts[, 1]
     counts <- counts[ , -1]
     countsRows <- nrow(counts)
 
     # Process factors
     if (is.null(opt$factInput)) {
-            factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
-            # order samples as in counts matrix
-            factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
-            factors <- factorData[, -1, drop=FALSE]
+        factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
+        if(!setequal(factorData[, 1], colnames(counts)))
+            stop("Sample IDs in counts and factors files don't match")
+        # order samples as in counts matrix
+        factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
+        factors <- factorData[, -1, drop=FALSE]
     }  else {
-            factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
-            factorData <- list()
-            for (fact in factors) {
-                newFact <- unlist(strsplit(fact, split="::"))
-                factorData <- rbind(factorData, newFact)
-            } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+        factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
+        factorData <- list()
+        for (fact in factors) {
+            newFact <- unlist(strsplit(fact, split="::"))
+            factorData <- rbind(factorData, newFact)
+        } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
 
-            # Set the row names to be the name of the factor and delete first row
-            row.names(factorData) <- factorData[, 1]
-            factorData <- factorData[, -1]
-            factorData <- sapply(factorData, sanitiseGroups)
-            factorData <- sapply(factorData, strsplit, split=",")
-            factorData <- sapply(factorData, make.names)
-            # Transform factor data into data frame of R factor objects
-            factors <- data.frame(factorData)
+        # Set the row names to be the name of the factor and delete first row
+        row.names(factorData) <- factorData[, 1]
+        factorData <- factorData[, -1]
+        factorData <- sapply(factorData, sanitiseGroups)
+        factorData <- sapply(factorData, strsplit, split=",")
+        # Transform factor data into data frame of R factor objects
+        factors <- data.frame(factorData)
     }
 }
+# check there are the same number of samples in counts and factors
+if(nrow(factors) != ncol(counts)) {
+    stop("There are a different number of samples in the counts files and factors")
+}
+# make groups valid R names, required for makeContrasts
+factors <- sapply(factors, make.names)
+factors <- data.frame(factors)
 
  # if annotation file provided
 if (haveAnno) {
@@ -339,6 +347,14 @@
 contrastData <- unlist(strsplit(opt$contrastData, split=","))
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
+# in case input groups start with numbers this will make the names valid R names, required for makeContrasts
+cons <- NULL
+for (i in contrastData) {
+    i <- strsplit(i, split="-")
+    i <- lapply(i, make.names)
+    i <- lapply(i, paste, collapse="-")
+    cons <- append(cons, unlist(i))
+}
 
 plots <- character()
 if (!is.null(opt$plots)) {
@@ -362,8 +378,8 @@
 mdvolOutPng <- character()
 topOut <- character()
 glimmaOut <- character()
-for (i in 1:length(contrastData)) {
-    con <- contrastData[i]
+for (i in 1:length(cons)) {
+    con <- cons[i]
     con <- gsub("\\(|\\)", "", con)
     mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
     volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
@@ -409,6 +425,7 @@
 # Creating naming data
 samplenames <- colnames(data$counts)
 sampleanno <- data.frame("sampleID"=samplenames, factors)
+row.names(factors) <- samplenames # for "Summary of experimental data" table
 
 # Creating colours for the groups
 cols <- as.numeric(factors[, 1])
@@ -451,7 +468,7 @@
 
     if (wantFilt) {
         print("Outputting filtered counts")
-        filt_counts <- data.frame(data$genes, data$counts)
+        filt_counts <- data.frame(data$genes, data$counts, check.names=FALSE)
         write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE))
     }
@@ -520,8 +537,6 @@
 y <- new("DGEList", data)
 
 print("Generating Design")
-# Name rows of factors according to their sample
-row.names(factors) <- names(data$counts)
 factorList <- sapply(names(factors), pasteListName)
 formula <- "~0"
 for (i in 1:length(factorList)) {
@@ -540,7 +555,7 @@
 
 # Generate contrasts information
 print("Generating Contrasts")
-contrasts <- makeContrasts(contrasts=contrastData, levels=design)
+contrasts <- makeContrasts(contrasts=cons, levels=design)
 
 ################################################################################
 ### Data Output
@@ -768,7 +783,7 @@
 
      # Save normalised counts (log2cpm)
     if (wantNorm) {
-        norm_counts <- data.frame(vData$genes, vData$E)
+        norm_counts <- data.frame(vData$genes, vData$E, check.names=FALSE)
         write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts"))))
     }
@@ -827,8 +842,8 @@
                        lfc=opt$lfcReq)
 sumStatus <- summary(status)
 
-for (i in 1:length(contrastData)) {
-    con <- contrastData[i]
+for (i in 1:length(cons)) {
+    con <- cons[i]
     con <- gsub("\\(|\\)", "", con)
     # Collect counts for differential expression
     upCount[i] <- sumStatus["Up", i]
@@ -979,7 +994,7 @@
     }
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
-row.names(sigDiff) <- contrastData
+row.names(sigDiff) <- cons
 
 # Save relevant items as rda object
 if (wantRda) {