diff deseq2.R @ 15:9a616afdbda5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
author iuc
date Sat, 19 May 2018 03:55:48 -0400
parents d0c39b5e78cf
children a416957ee305
line wrap: on
line diff
--- a/deseq2.R	Thu Apr 12 17:29:45 2018 -0400
+++ b/deseq2.R	Sat May 19 03:55:48 2018 -0400
@@ -8,14 +8,14 @@
 #   'factors' a JSON list object from Galaxy
 #
 # the output file has columns:
-# 
+#
 #   baseMean (mean normalized count)
 #   log2FoldChange (by default a moderated LFC estimate)
 #   lfcSE (the standard error)
 #   stat (the Wald statistic)
 #   pvalue (p-value from comparison of Wald statistic to a standard Normal)
 #   padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter)
-# 
+#
 # the first variable in 'factors' will be the primary factor.
 # the levels of the primary factor are used in the order of appearance in factors.
 #
@@ -48,6 +48,7 @@
   "help", "h", 0, "logical",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "header", "H", 0, "logical",
   "factors", "f", 1, "character",
   "files_to_labels", "l", 1, "character",
   "plots" , "p", 1, "character",
@@ -86,6 +87,12 @@
   FALSE
 }
 
+if (!is.null(opt$header)) {
+  hasHeader <- TRUE
+} else {
+  hasHeader <- FALSE
+}
+
 if (!is.null(opt$tximport)) {
   if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
   if (tolower(file_ext(opt$tx2gene)) == "gtf") {
@@ -138,15 +145,6 @@
 primaryFactor <- factors[1]
 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
 
-if (verbose) {
-  cat("DESeq2 run information\n\n")
-  cat("sample table:\n")
-  print(sampleTable[,-c(1:2),drop=FALSE])
-  cat("\ndesign formula:\n")
-  print(designFormula)
-  cat("\n\n")
-}
-
 # these are plots which are made once for each analysis
 generateGenericPlots <- function(dds, factors) {
   library("ggplot2")
@@ -202,13 +200,29 @@
 # For JSON input from Galaxy, path is absolute
 dir <- ""
 
-if (!useTXI) {
+if (!useTXI & hasHeader) {
+    countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
+    tbl <- do.call("cbind", countfiles)
+    rownames(sampleTable) <- colnames(tbl) # take sample ids from header
+
+    # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
+    oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
+        "not_aligned", "alignment_not_unique")
+    specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
+    tbl <- tbl[!specialRows, , drop = FALSE]
+
+    dds <- DESeqDataSetFromMatrix(countData = tbl,
+                                  colData = sampleTable[,-c(1:2), drop=FALSE],
+                                  design = designFormula)
+} else if (!useTXI & !hasHeader) {
+
   # construct the object from HTSeq files
   dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
                                     directory = dir,
                                     design =  designFormula)
   labs <- unname(unlist(filenames_to_labels[colnames(dds)]))
   colnames(dds) <- labs
+
 } else {
     # construct the object using tximport
     # first need to make the tx2gene table
@@ -232,7 +246,16 @@
                                     designFormula)
 }
 
-if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
+if (verbose) {
+  cat("DESeq2 run information\n\n")
+  cat("sample table:\n")
+  print(sampleTable[,-c(1:2),drop=FALSE])
+  cat("\ndesign formula:\n")
+  print(designFormula)
+  cat("\n\n")
+  cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
+}
+
 # optional outlier behavior
 if (is.null(opt$outlier_replace_off)) {
   minRep <- 7
@@ -242,7 +265,7 @@
 }
 if (is.null(opt$outlier_filter_off)) {
   cooksCutoff <- TRUE
-} else {  
+} else {
   cooksCutoff <- FALSE
   if (verbose) cat("outlier filtering off\n")
 }