diff get_deseq_dataset.R @ 17:d9e5cadc7f0b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
author iuc
date Wed, 05 Sep 2018 15:54:03 -0400
parents
children c56e0689e46e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_deseq_dataset.R	Wed Sep 05 15:54:03 2018 -0400
@@ -0,0 +1,69 @@
+get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
+
+  dir <- ""
+
+  if (!is.null(header)) {
+    hasHeader <- TRUE
+  } else {
+    hasHeader <- FALSE
+  }
+
+  if (!is.null(tximport)) {
+    if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
+    if (tolower(file_ext(opt$tx2gene)) == "gtf") {
+      gtfFile <-tx2gene
+    } else {
+      gtfFile <- NULL
+      tx2gene <- read.table(tx2gene, header=FALSE)
+    }
+    useTXI <- TRUE
+  } else {
+    useTXI <- FALSE
+  }
+
+  if (!useTXI & hasHeader) {
+      countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
+      tbl <- do.call("cbind", countfiles)
+      colnames(tbl) <- rownames(sampleTable) # take sample ids from header
+
+      # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
+      oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
+          "not_aligned", "alignment_not_unique")
+      specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
+      tbl <- tbl[!specialRows, , drop = FALSE]
+
+      dds <- DESeqDataSetFromMatrix(countData = tbl,
+                                    colData = subset(sampleTable, select=-(filename)),
+                                    design = designFormula)
+  } else if (!useTXI & !hasHeader) {
+
+    # construct the object from HTSeq files
+    dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
+                                      directory = dir,
+                                      design =  designFormula)
+    colnames(dds) <- row.names(sampleTable)
+
+  } else {
+      # construct the object using tximport
+      # first need to make the tx2gene table
+      # this takes ~2-3 minutes using Bioconductor functions
+      if (!is.null(gtfFile)) {
+        suppressPackageStartupMessages({
+          library("GenomicFeatures")
+        })
+        txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
+        k <- keys(txdb, keytype = "GENEID")
+        df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
+        tx2gene <- df[, 2:1]  # tx ID, then gene ID
+      }
+      library("tximport")
+      txiFiles <- as.character(sampleTable$filename)
+      labs <- row.names(sampleTable)
+      names(txiFiles) <- labs
+      txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+      dds <- DESeqDataSetFromTximport(txi,
+                                      subset(sampleTable, select=-c(filename)),
+                                      designFormula)
+  }
+  return(dds)
+}