Mercurial > repos > iuc > limma_voom
diff limma_voom.R @ 17:86b3df7db58b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 42b1160a549a85c87ed5226a83b55c4e44648597
author | iuc |
---|---|
date | Mon, 18 Feb 2019 17:49:24 -0500 |
parents | 5d903d528193 |
children | 97e06a4c7c75 |
line wrap: on
line diff
--- a/limma_voom.R Sat Feb 09 07:35:04 2019 -0500 +++ b/limma_voom.R Mon Feb 18 17:49:24 2019 -0500 @@ -297,35 +297,43 @@ } else { # Process the single count matrix - counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE) + counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE, check.names=FALSE) row.names(counts) <- counts[, 1] counts <- counts[ , -1] countsRows <- nrow(counts) # Process factors if (is.null(opt$factInput)) { - factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) - # order samples as in counts matrix - factorData <- factorData[match(colnames(counts), factorData[, 1]), ] - factors <- factorData[, -1, drop=FALSE] + factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) + if(!setequal(factorData[, 1], colnames(counts))) + stop("Sample IDs in counts and factors files don't match") + # order samples as in counts matrix + factorData <- factorData[match(colnames(counts), factorData[, 1]), ] + factors <- factorData[, -1, drop=FALSE] } else { - factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) - factorData <- list() - for (fact in factors) { - newFact <- unlist(strsplit(fact, split="::")) - factorData <- rbind(factorData, newFact) - } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. + factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) + factorData <- list() + for (fact in factors) { + newFact <- unlist(strsplit(fact, split="::")) + factorData <- rbind(factorData, newFact) + } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. - # Set the row names to be the name of the factor and delete first row - row.names(factorData) <- factorData[, 1] - factorData <- factorData[, -1] - factorData <- sapply(factorData, sanitiseGroups) - factorData <- sapply(factorData, strsplit, split=",") - factorData <- sapply(factorData, make.names) - # Transform factor data into data frame of R factor objects - factors <- data.frame(factorData) + # Set the row names to be the name of the factor and delete first row + row.names(factorData) <- factorData[, 1] + factorData <- factorData[, -1] + factorData <- sapply(factorData, sanitiseGroups) + factorData <- sapply(factorData, strsplit, split=",") + # Transform factor data into data frame of R factor objects + factors <- data.frame(factorData) } } +# check there are the same number of samples in counts and factors +if(nrow(factors) != ncol(counts)) { + stop("There are a different number of samples in the counts files and factors") +} +# make groups valid R names, required for makeContrasts +factors <- sapply(factors, make.names) +factors <- data.frame(factors) # if annotation file provided if (haveAnno) { @@ -339,6 +347,14 @@ contrastData <- unlist(strsplit(opt$contrastData, split=",")) contrastData <- sanitiseEquation(contrastData) contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) +# in case input groups start with numbers this will make the names valid R names, required for makeContrasts +cons <- NULL +for (i in contrastData) { + i <- strsplit(i, split="-") + i <- lapply(i, make.names) + i <- lapply(i, paste, collapse="-") + cons <- append(cons, unlist(i)) +} plots <- character() if (!is.null(opt$plots)) { @@ -362,8 +378,8 @@ mdvolOutPng <- character() topOut <- character() glimmaOut <- character() -for (i in 1:length(contrastData)) { - con <- contrastData[i] +for (i in 1:length(cons)) { + con <- cons[i] con <- gsub("\\(|\\)", "", con) mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf")) volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf")) @@ -409,6 +425,7 @@ # Creating naming data samplenames <- colnames(data$counts) sampleanno <- data.frame("sampleID"=samplenames, factors) +row.names(factors) <- samplenames # for "Summary of experimental data" table # Creating colours for the groups cols <- as.numeric(factors[, 1]) @@ -451,7 +468,7 @@ if (wantFilt) { print("Outputting filtered counts") - filt_counts <- data.frame(data$genes, data$counts) + filt_counts <- data.frame(data$genes, data$counts, check.names=FALSE) write.table(filt_counts, file=filtOut, row.names=FALSE, sep="\t", quote=FALSE) linkData <- rbind(linkData, data.frame(Label=paste0(deMethod, "_", "filtcounts.tsv"), Link=paste0(deMethod, "_", "filtcounts"), stringsAsFactors=FALSE)) } @@ -520,8 +537,6 @@ y <- new("DGEList", data) print("Generating Design") -# Name rows of factors according to their sample -row.names(factors) <- names(data$counts) factorList <- sapply(names(factors), pasteListName) formula <- "~0" for (i in 1:length(factorList)) { @@ -540,7 +555,7 @@ # Generate contrasts information print("Generating Contrasts") -contrasts <- makeContrasts(contrasts=contrastData, levels=design) +contrasts <- makeContrasts(contrasts=cons, levels=design) ################################################################################ ### Data Output @@ -768,7 +783,7 @@ # Save normalised counts (log2cpm) if (wantNorm) { - norm_counts <- data.frame(vData$genes, vData$E) + norm_counts <- data.frame(vData$genes, vData$E, check.names=FALSE) write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) } @@ -827,8 +842,8 @@ lfc=opt$lfcReq) sumStatus <- summary(status) -for (i in 1:length(contrastData)) { - con <- contrastData[i] +for (i in 1:length(cons)) { + con <- cons[i] con <- gsub("\\(|\\)", "", con) # Collect counts for differential expression upCount[i] <- sumStatus["Up", i] @@ -979,7 +994,7 @@ } } sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) -row.names(sigDiff) <- contrastData +row.names(sigDiff) <- cons # Save relevant items as rda object if (wantRda) {