# HG changeset patch
# User iuc
# Date 1536177243 14400
# Node ID d9e5cadc7f0bb7603a294a9990ab286fbc243b8e
# Parent a416957ee3059a0f6b3c729b750ec3a42e0b81bc
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
diff -r a416957ee305 -r d9e5cadc7f0b deseq2.R
--- a/deseq2.R Fri Aug 03 17:23:46 2018 -0400
+++ b/deseq2.R Wed Sep 05 15:54:03 2018 -0400
@@ -46,6 +46,7 @@
spec <- matrix(c(
"quiet", "q", 0, "logical",
"help", "h", 0, "logical",
+ "batch_factors", "", 1, "character",
"outfile", "o", 1, "character",
"countsfile", "n", 1, "character",
"header", "H", 0, "logical",
@@ -87,24 +88,13 @@
FALSE
}
-if (!is.null(opt$header)) {
- hasHeader <- TRUE
-} else {
- hasHeader <- FALSE
+source_local <- function(fname){
+ argv <- commandArgs(trailingOnly = FALSE)
+ base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+ source(paste(base_dir, fname, sep="/"))
}
-if (!is.null(opt$tximport)) {
- if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
- if (tolower(file_ext(opt$tx2gene)) == "gtf") {
- gtfFile <- opt$tx2gene
- } else {
- gtfFile <- NULL
- tx2gene <- read.table(opt$tx2gene, header=FALSE)
- }
- useTXI <- TRUE
-} else {
- useTXI <- FALSE
-}
+source_local('get_deseq_dataset.R')
suppressPackageStartupMessages({
library("DESeq2")
@@ -197,53 +187,34 @@
cat("\n---------------------\n")
}
-# For JSON input from Galaxy, path is absolute
-dir <- ""
-
-if (!useTXI & hasHeader) {
- countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
- tbl <- do.call("cbind", countfiles)
- colnames(tbl) <- rownames(sampleTable) # take sample ids from header
-
- # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
- oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
- "not_aligned", "alignment_not_unique")
- specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
- tbl <- tbl[!specialRows, , drop = FALSE]
-
- dds <- DESeqDataSetFromMatrix(countData = tbl,
- colData = sampleTable[,-c(1:2), drop=FALSE],
- design = designFormula)
-} else if (!useTXI & !hasHeader) {
+dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene)
- # construct the object from HTSeq files
- dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
- directory = dir,
- design = designFormula)
- labs <- unname(unlist(filenames_to_labels[colnames(dds)]))
- colnames(dds) <- labs
+apply_batch_factors <- function (dds, batch_factors) {
+ rownames(batch_factors) <- batch_factors$identifier
+ batch_factors <- subset(batch_factors, select = -c(identifier, condition))
+ dds_samples <- colnames(dds)
+ batch_samples <- rownames(batch_factors)
+ if (!setequal(batch_samples, dds_samples)) {
+ stop("Batch factor names don't correspond to input sample names, check input files")
+ }
+ dds_data <- colData(dds)
+ # Merge dds_data with batch_factors using indexes, which are sample names
+ # Set sort to False, which maintains the order in dds_data
+ reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F)
+ batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)]
+ for (factor in colnames(batch_factors)) {
+ dds[[factor]] <- batch_factors[[factor]]
+ }
+ colnames(dds) <- reordered_batch[,1]
+ return(dds)
+}
-} else {
- # construct the object using tximport
- # first need to make the tx2gene table
- # this takes ~2-3 minutes using Bioconductor functions
- if (!is.null(gtfFile)) {
- suppressPackageStartupMessages({
- library("GenomicFeatures")
- })
- txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
- k <- keys(txdb, keytype = "GENEID")
- df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
- tx2gene <- df[, 2:1] # tx ID, then gene ID
- }
- library("tximport")
- txiFiles <- as.character(sampleTable[,2])
- labs <- unname(unlist(filenames_to_labels[sampleTable[,1]]))
- names(txiFiles) <- labs
- txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene)
- dds <- DESeqDataSetFromTximport(txi,
- sampleTable[,3:ncol(sampleTable),drop=FALSE],
- designFormula)
+if (!is.null(opt$batch_factors)) {
+ batch_factors <- read.table(opt$batch_factors, sep="\t", header=T)
+ dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors)
+ batch_design <- colnames(batch_factors)[-c(1,2)]
+ designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + ")))
+ design(dds) <- designFormula
}
if (verbose) {
diff -r a416957ee305 -r d9e5cadc7f0b deseq2.xml
--- a/deseq2.xml Fri Aug 03 17:23:46 2018 -0400
+++ b/deseq2.xml Wed Sep 05 15:54:03 2018 -0400
@@ -64,6 +64,9 @@
-f '#echo json.dumps(temp_factor_names)#'
-l '#echo json.dumps(filename_to_element_identifiers)#'
-t $fit_type
+ #if $batch_factors
+ --batch_factors '$batch_factors'
+ #end if
#if $outlier_replace_off:
-a
#end if
@@ -105,7 +108,7 @@
-
+
@@ -206,6 +209,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r a416957ee305 -r d9e5cadc7f0b get_deseq_dataset.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_deseq_dataset.R Wed Sep 05 15:54:03 2018 -0400
@@ -0,0 +1,69 @@
+get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
+
+ dir <- ""
+
+ if (!is.null(header)) {
+ hasHeader <- TRUE
+ } else {
+ hasHeader <- FALSE
+ }
+
+ if (!is.null(tximport)) {
+ if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
+ if (tolower(file_ext(opt$tx2gene)) == "gtf") {
+ gtfFile <-tx2gene
+ } else {
+ gtfFile <- NULL
+ tx2gene <- read.table(tx2gene, header=FALSE)
+ }
+ useTXI <- TRUE
+ } else {
+ useTXI <- FALSE
+ }
+
+ if (!useTXI & hasHeader) {
+ countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
+ tbl <- do.call("cbind", countfiles)
+ colnames(tbl) <- rownames(sampleTable) # take sample ids from header
+
+ # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
+ oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
+ "not_aligned", "alignment_not_unique")
+ specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
+ tbl <- tbl[!specialRows, , drop = FALSE]
+
+ dds <- DESeqDataSetFromMatrix(countData = tbl,
+ colData = subset(sampleTable, select=-(filename)),
+ design = designFormula)
+ } else if (!useTXI & !hasHeader) {
+
+ # construct the object from HTSeq files
+ dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
+ directory = dir,
+ design = designFormula)
+ colnames(dds) <- row.names(sampleTable)
+
+ } else {
+ # construct the object using tximport
+ # first need to make the tx2gene table
+ # this takes ~2-3 minutes using Bioconductor functions
+ if (!is.null(gtfFile)) {
+ suppressPackageStartupMessages({
+ library("GenomicFeatures")
+ })
+ txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
+ k <- keys(txdb, keytype = "GENEID")
+ df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
+ tx2gene <- df[, 2:1] # tx ID, then gene ID
+ }
+ library("tximport")
+ txiFiles <- as.character(sampleTable$filename)
+ labs <- row.names(sampleTable)
+ names(txiFiles) <- labs
+ txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+ dds <- DESeqDataSetFromTximport(txi,
+ subset(sampleTable, select=-c(filename)),
+ designFormula)
+ }
+ return(dds)
+}
diff -r a416957ee305 -r d9e5cadc7f0b test-data/batch_factors.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/batch_factors.tab Wed Sep 05 15:54:03 2018 -0400
@@ -0,0 +1,8 @@
+identifier condition W_1
+GSM461179_treat_single.counts Treatment -0.607164236699804
+GSM461180_treat_paired.counts Treatment 0.261662524312529
+GSM461181_treat_paired.counts Treatment 0.341268033254441
+GSM461176_untreat_single.counts Control -0.297792561552867
+GSM461177_untreat_paired.counts Control 0.322555604859173
+GSM461178_untreat_paired.counts Control 0.345745982909988
+GSM461182_untreat_single.counts Control -0.366275347083457
diff -r a416957ee305 -r d9e5cadc7f0b test-data/out_deseq2_sailfish.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_deseq2_sailfish.tab Wed Sep 05 15:54:03 2018 -0400
@@ -0,0 +1,4 @@
+MIR6859-2 1.1858265336986 -1.58325519934585 1.29566733255222 -1.22196119294536 0.221722302676964 0.886889210707857
+DDX11L1 1.91972630671259 -0.204757212641591 1.31524142129485 -0.155680325548148 0.876285006005361 0.999999999763967
+MIR6859-1 0.294068009390125 -0.712896459907293 1.28824575367762 -0.55338545294805 0.579999497913718 0.999999999763967
+WASH7P 114.545909292233 -3.75034693001422e-10 1.2677635335979 -2.95823852841924e-10 0.999999999763967 0.999999999763967