limma_voom: limma_voom.R comparison

comparison limma_voom.R @ 17:86b3df7db58b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 42b1160a549a85c87ed5226a83b55c4e44648597

author	iuc
date	Mon, 18 Feb 2019 17:49:24 -0500
parents	5d903d528193
children	97e06a4c7c75

comparison

equal deleted inserted replaced

-:5d903d528193
+:86b3df7db58b
 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
 counts <- do.call("cbind", countfiles)
 } else {
 # Process the single count matrix
-counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE)
+counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE, check.names=FALSE)
 row.names(counts) <- counts[, 1]
 counts <- counts[ , -1]
 countsRows <- nrow(counts)
 # Process factors
 if (is.null(opt$factInput)) {
 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
-# order samples as in counts matrix
+if(!setequal(factorData[, 1], colnames(counts)))
-factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
+stop("Sample IDs in counts and factors files don't match")
-factors <- factorData[, -1, drop=FALSE]
+# order samples as in counts matrix
+factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
+factors <- factorData[, -1, drop=FALSE]
 }  else {
 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
 factorData <- list()
 for (fact in factors) {
 newFact <- unlist(strsplit(fact, split="::"))
 factorData <- rbind(factorData, newFact)
 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
 # Set the row names to be the name of the factor and delete first row
 row.names(factorData) <- factorData[, 1]
 factorData <- factorData[, -1]
 factorData <- sapply(factorData, sanitiseGroups)
 factorData <- sapply(factorData, strsplit, split=",")
-factorData <- sapply(factorData, make.names)
+# Transform factor data into data frame of R factor objects
-# Transform factor data into data frame of R factor objects
+factors <- data.frame(factorData)
-factors <- data.frame(factorData)
+}
 }
-}
+# check there are the same number of samples in counts and factors
+if(nrow(factors) != ncol(counts)) {
+stop("There are a different number of samples in the counts files and factors")
+}
+# make groups valid R names, required for makeContrasts
+factors <- sapply(factors, make.names)
+factors <- data.frame(factors)
 # if annotation file provided
 if (haveAnno) {
 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE)
 }
 # Split up contrasts seperated by comma into a vector then sanitise
 contrastData <- unlist(strsplit(opt$contrastData, split=","))
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
+# in case input groups start with numbers this will make the names valid R names, required for makeContrasts
+cons <- NULL
+for (i in contrastData) {
+i <- strsplit(i, split="-")
+i <- lapply(i, make.names)
+i <- lapply(i, paste, collapse="-")
+cons <- append(cons, unlist(i))
+}
 plots <- character()
 if (!is.null(opt$plots)) {
 plots <- unlist(strsplit(opt$plots, split=","))
 }
 heatOutPdf <- character()
 stripOutPdf <- character()
 mdvolOutPng <- character()
 topOut <- character()
 glimmaOut <- character()
-for (i in 1:length(contrastData)) {
+for (i in 1:length(cons)) {
-con <- contrastData[i]
+con <- cons[i]
 con <- gsub("\\(|\\)", "", con)
 mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
 volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
 heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf"))
 stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf"))
 }
 # Creating naming data
 samplenames <- colnames(data$counts)
 sampleanno <- data.frame("sampleID"=samplenames, factors)
+row.names(factors) <- samplenames # for "Summary of experimental data" table
 # Creating colours for the groups
 cols <- as.numeric(factors[, 1])
 col.group <- palette()[cols]
 data$counts <- data$counts[keep, ]
 data$genes <- data$genes[keep, , drop=FALSE]
 if (wantFilt) {
 print("Outputting filtered counts")
-filt_counts <- data.frame(data$genes, data$counts)
+filt_counts <- data.frame(data$genes, data$counts, check.names=FALSE)
 write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE)
 linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE))
 }
 # Plot Density
 data$samples$norm.factors <- 1
 row.names(data$samples) <- colnames(data$counts)
 y <- new("DGEList", data)
 print("Generating Design")
-# Name rows of factors according to their sample
-row.names(factors) <- names(data$counts)
 factorList <- sapply(names(factors), pasteListName)
 formula <- "~0"
 for (i in 1:length(factorList)) {
 formula <- paste(formula,factorList[i], sep="+")
 }
 logcounts <- y #store for plots
 y <- calcNormFactors(y, method=opt$normOpt)
 # Generate contrasts information
 print("Generating Contrasts")
-contrasts <- makeContrasts(contrasts=contrastData, levels=design)
+contrasts <- makeContrasts(contrasts=cons, levels=design)
 ################################################################################
 ### Data Output
 ################################################################################
 voomFit <- lmFit(vData, design)
 }
 # Save normalised counts (log2cpm)
 if (wantNorm) {
-norm_counts <- data.frame(vData$genes, vData$E)
+norm_counts <- data.frame(vData$genes, vData$E, check.names=FALSE)
 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts"))))
 }
 # Fit linear model and estimate dispersion with eBayes
 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq,
 lfc=opt$lfcReq)
 sumStatus <- summary(status)
-for (i in 1:length(contrastData)) {
+for (i in 1:length(cons)) {
-con <- contrastData[i]
+con <- cons[i]
 con <- gsub("\\(|\\)", "", con)
 # Collect counts for differential expression
 upCount[i] <- sumStatus["Up", i]
 downCount[i] <- sumStatus["Down", i]
 flatCount[i] <- sumStatus["NotSig", i]
 linkData <- rbind(linkData, c(linkName, linkAddr))
 invisible(dev.off())
 }
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
-row.names(sigDiff) <- contrastData
+row.names(sigDiff) <- cons
 # Save relevant items as rda object
 if (wantRda) {
 print("Saving RData")
 if (wantWeight) {

Mercurial > repos > iuc > limma_voom

comparison limma_voom.R @ 17:86b3df7db58b draft