Mercurial > repos > iuc > limma_voom
diff limma_voom.R @ 1:76d01fe0ec36 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 58c05c0ce9334f8b9c800283cfd1f40573546edd
author | iuc |
---|---|
date | Wed, 05 Jul 2017 04:39:42 -0400 |
parents | bdebdea5f6a7 |
children | a330ddf43861 |
line wrap: on
line diff
--- a/limma_voom.R Mon Jun 12 07:41:02 2017 -0400 +++ b/limma_voom.R Wed Jul 05 04:39:42 2017 -0400 @@ -3,7 +3,7 @@ # expression analysis # # ARGS: 1.countPath -Path to RData input containing counts -# 2.annoPath -Path to RData input containing gene annotations +# 2.annoPath -Path to input containing gene annotations # 3.htmlPath -Path to html file linking to other outputs # 4.outPath -Path to folder to write all output to # 5.rdaOpt -String specifying if RData should be saved @@ -15,15 +15,18 @@ # 11.pAdjOpt -String specifying the p-value adjustment method # 12.pValReq -Float specifying the p-value requirement # 13.lfcReq -Float specifying the log-fold-change requirement -# 14.factorData -String containing factor names and values +# 14.normCounts -String specifying if normalised counts should be output +# 15.factPath -Path to factor information file +# 16.factorData -Strings containing factor names and values if manually input # # OUT: Voom Plot # BCV Plot # MA Plot -# Top Expression Table +# Expression Table # HTML file linking to the ouputs # # Author: Shian Su - registertonysu@gmail.com - Jan 2014 +# Modified by: Maria Doyle - Jun 2017 # Record starting time timeStart <- as.character(Sys.time()) @@ -148,13 +151,31 @@ pAdjOpt <- as.character(argv[11]) pValReq <- as.numeric(argv[12]) lfcReq <- as.numeric(argv[13]) -factorData <- list() -for (i in 14:length(argv)) { - newFact <- unlist(strsplit(as.character(argv[i]), split="::")) - factorData <- rbind(factorData, newFact) -} # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... +normCounts <- as.character(argv[14]) +factPath <- as.character(argv[15]) +# Process factors +if (as.character(argv[16])=="None") { + factorData <- read.table(factPath, header=TRUE, sep="\t") + factors <- factorData[,-1] +} else { + factorData <- list() + for (i in 16:length(argv)) { + newFact <- unlist(strsplit(as.character(argv[i]), split="::")) + factorData <- rbind(factorData, newFact) + } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. -# Process arguments + # Set the row names to be the name of the factor and delete first row + row.names(factorData) <- factorData[, 1] + factorData <- factorData[, -1] + factorData <- sapply(factorData, sanitiseGroups) + factorData <- sapply(factorData, strsplit, split=",") + factorData <- sapply(factorData, make.names) + # Transform factor data into data frame of R factor objects + factors <- data.frame(factorData) + +} + +# Process other arguments if (weightOpt=="yes") { wantWeight <- TRUE } else { @@ -173,15 +194,12 @@ haveAnno <- TRUE } -# Set the row names to be the name of the factor and delete first row -row.names(factorData) <- factorData[, 1] -factorData <- factorData[, -1] -factorData <- sapply(factorData, sanitiseGroups) -factorData <- sapply(factorData, strsplit, split=",") -factorData <- sapply(factorData, make.names) +if (normCounts=="yes") { + wantNorm <- TRUE +} else { + wantNorm <- FALSE +} -# Transform factor data into data frame of R factor objects -factors <- data.frame(factorData) #Create output directory dir.create(outPath, showWarnings=FALSE) @@ -205,6 +223,7 @@ maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png")) topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv")) } # Save output paths for each contrast as vectors +normOut <- makeOut("limma-voom_normcounts.tsv") rdaOut <- makeOut("RData.rda") sessionOut <- makeOut("session_info.txt") @@ -221,12 +240,12 @@ flatCount <- numeric() # Read in counts and geneanno data -counts <- read.table(countPath, header=TRUE, sep="\t") +counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) row.names(counts) <- counts[, 1] counts <- counts[ , -1] countsRows <- nrow(counts) if (haveAnno) { - geneanno <- read.table(annoPath, header=TRUE, sep="\t") + geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) } ################################################################################ @@ -247,7 +266,7 @@ preFilterCount <- nrow(data$counts) sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq data$counts <- data$counts[sel, ] -data$genes <- data$genes[sel, ] +data$genes <- data$genes[sel, ,drop = FALSE] postFilterCount <- nrow(data$counts) filteredCount <- preFilterCount-postFilterCount @@ -255,6 +274,7 @@ samplenames <- colnames(data$counts) sampleanno <- data.frame("sampleID"=samplenames, factors) + # Generating the DGEList object "data" data$samples <- sampleanno data$samples$lib.size <- colSums(data$counts) @@ -263,14 +283,17 @@ data <- new("DGEList", data) factorList <- sapply(names(factors), pasteListName) -formula <- "~0" + +formula <- "~0" for (i in 1:length(factorList)) { - formula <- paste(formula, factorList[i], sep="+") + formula <- paste(formula,factorList[i], sep="+") } + formula <- formula(formula) design <- model.matrix(formula) + for (i in 1:length(factorList)) { - colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) + colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) } # Calculating normalising factor, estimating dispersion @@ -330,6 +353,13 @@ } + # Save normalised counts (log2cpm) +if (wantNorm) { + norm_counts <- data.frame(vData$genes, vData$E) + write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t") + linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv")) +} + # Fit linear model and estimate dispersion with eBayes voomFit <- contrasts.fit(voomFit, contrasts) voomFit <- eBayes(voomFit) @@ -368,12 +398,11 @@ top <- topTable(voomFit, coef=i, number=Inf, sort.by="P") write.table(top, file=topOut[i], row.names=FALSE, sep="\t") - linkName <- paste0("limma-voom_", contrastData[i], - ".tsv") + linkName <- paste0("limma-voom_", contrastData[i], ".tsv") linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv") linkData <- rbind(linkData, c(linkName, linkAddr)) - # Plot MA (log ratios vs mean average) using limma package on weighted data + # Plot MA (log ratios vs mean average) using limma package on weighted pdf(maOutPdf[i]) limma::plotMA(voomFit, status=status, coef=i, main=paste("MA Plot:", unmake.names(contrastData[i])), @@ -534,12 +563,14 @@ cata("<h4>Summary of experimental data:</h4>\n") -cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP*</p>\n") +cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n") cata("<table border=\"1\" cellpadding=\"3\">\n") cata("<tr>\n") -TableItem() -for (i in names(factors)) { +TableHeadItem("SampleID") +TableHeadItem(names(factors)[1]," (Primary Factor)") + +for (i in names(factors)[2:length(names(factors))]) { TableHeadItem(i) } cata("</tr>\n") @@ -547,7 +578,7 @@ for (i in 1:nrow(factors)) { cata("<tr>\n") TableHeadItem(row.names(factors)[i]) - for (j in ncol(factors)) { + for (j in 1:ncol(factors)) { TableItem(as.character(unmake.names(factors[i, j]))) } cata("</tr>\n")