Mercurial > repos > iuc > deseq2
diff deseq2.R @ 17:d9e5cadc7f0b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
author | iuc |
---|---|
date | Wed, 05 Sep 2018 15:54:03 -0400 |
parents | a416957ee305 |
children | 3bf1b3ec1ddf |
line wrap: on
line diff
--- a/deseq2.R Fri Aug 03 17:23:46 2018 -0400 +++ b/deseq2.R Wed Sep 05 15:54:03 2018 -0400 @@ -46,6 +46,7 @@ spec <- matrix(c( "quiet", "q", 0, "logical", "help", "h", 0, "logical", + "batch_factors", "", 1, "character", "outfile", "o", 1, "character", "countsfile", "n", 1, "character", "header", "H", 0, "logical", @@ -87,24 +88,13 @@ FALSE } -if (!is.null(opt$header)) { - hasHeader <- TRUE -} else { - hasHeader <- FALSE +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) } -if (!is.null(opt$tximport)) { - if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") - if (tolower(file_ext(opt$tx2gene)) == "gtf") { - gtfFile <- opt$tx2gene - } else { - gtfFile <- NULL - tx2gene <- read.table(opt$tx2gene, header=FALSE) - } - useTXI <- TRUE -} else { - useTXI <- FALSE -} +source_local('get_deseq_dataset.R') suppressPackageStartupMessages({ library("DESeq2") @@ -197,53 +187,34 @@ cat("\n---------------------\n") } -# For JSON input from Galaxy, path is absolute -dir <- "" - -if (!useTXI & hasHeader) { - countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) - tbl <- do.call("cbind", countfiles) - colnames(tbl) <- rownames(sampleTable) # take sample ids from header - - # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) - oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", - "not_aligned", "alignment_not_unique") - specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames - tbl <- tbl[!specialRows, , drop = FALSE] - - dds <- DESeqDataSetFromMatrix(countData = tbl, - colData = sampleTable[,-c(1:2), drop=FALSE], - design = designFormula) -} else if (!useTXI & !hasHeader) { +dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene) - # construct the object from HTSeq files - dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, - directory = dir, - design = designFormula) - labs <- unname(unlist(filenames_to_labels[colnames(dds)])) - colnames(dds) <- labs +apply_batch_factors <- function (dds, batch_factors) { + rownames(batch_factors) <- batch_factors$identifier + batch_factors <- subset(batch_factors, select = -c(identifier, condition)) + dds_samples <- colnames(dds) + batch_samples <- rownames(batch_factors) + if (!setequal(batch_samples, dds_samples)) { + stop("Batch factor names don't correspond to input sample names, check input files") + } + dds_data <- colData(dds) + # Merge dds_data with batch_factors using indexes, which are sample names + # Set sort to False, which maintains the order in dds_data + reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F) + batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] + for (factor in colnames(batch_factors)) { + dds[[factor]] <- batch_factors[[factor]] + } + colnames(dds) <- reordered_batch[,1] + return(dds) +} -} else { - # construct the object using tximport - # first need to make the tx2gene table - # this takes ~2-3 minutes using Bioconductor functions - if (!is.null(gtfFile)) { - suppressPackageStartupMessages({ - library("GenomicFeatures") - }) - txdb <- makeTxDbFromGFF(gtfFile, format="gtf") - k <- keys(txdb, keytype = "GENEID") - df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") - tx2gene <- df[, 2:1] # tx ID, then gene ID - } - library("tximport") - txiFiles <- as.character(sampleTable[,2]) - labs <- unname(unlist(filenames_to_labels[sampleTable[,1]])) - names(txiFiles) <- labs - txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene) - dds <- DESeqDataSetFromTximport(txi, - sampleTable[,3:ncol(sampleTable),drop=FALSE], - designFormula) +if (!is.null(opt$batch_factors)) { + batch_factors <- read.table(opt$batch_factors, sep="\t", header=T) + dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) + batch_design <- colnames(batch_factors)[-c(1,2)] + designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + "))) + design(dds) <- designFormula } if (verbose) {