diff get_deseq_dataset.R @ 25:de44f8eff84a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
author iuc
date Wed, 25 Nov 2020 18:36:55 +0000
parents 0696db066a5b
children 8fe98f7094de
line wrap: on
line diff
--- a/get_deseq_dataset.R	Sat Feb 29 17:08:08 2020 -0500
+++ b/get_deseq_dataset.R	Wed Nov 25 18:36:55 2020 +0000
@@ -1,76 +1,80 @@
-get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
+get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) {
 
   dir <- ""
 
-  if (!is.null(header)) {
-    hasHeader <- TRUE
-  } else {
-    hasHeader <- FALSE
-  }
-
-  if (!is.null(tximport)) {
+  has_header <- !is.null(header)
+  use_txi <- !is.null(tximport)
+  if (use_txi) {
     if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
     if (tolower(file_ext(tx2gene)) == "gff") {
-      gffFile <-tx2gene
+      gff_file <- tx2gene
     } else {
-      gffFile <- NULL
-      tx2gene <- read.table(tx2gene, header=hasHeader)
+      gff_file <- NULL
+      tx2gene <- read.table(tx2gene, header = has_header)
     }
-    useTXI <- TRUE
-  } else {
-    useTXI <- FALSE
   }
 
-  if (!useTXI & hasHeader) {
-      countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
+  if (!use_txi & has_header) {
+      countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
       tbl <- do.call("cbind", countfiles)
-      colnames(tbl) <- rownames(sampleTable) # take sample ids from header
+      colnames(tbl) <- rownames(sample_table) # take sample ids from header
 
       # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
-      oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
-          "not_aligned", "alignment_not_unique")
-      specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
-      tbl <- tbl[!specialRows, , drop = FALSE]
+      old_special_names <- c(
+        "no_feature",
+        "ambiguous",
+        "too_low_aQual",
+        "not_aligned",
+        "alignment_not_unique"
+      )
+      special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names
+      tbl <- tbl[!special_rows, , drop = FALSE]
 
-      dds <- DESeqDataSetFromMatrix(countData = tbl,
-                                    colData = subset(sampleTable, select=-(filename)),
-                                    design = designFormula)
-  } else if (!useTXI & !hasHeader) {
+      dds <- DESeqDataSetFromMatrix(
+        countData = tbl,
+        colData = subset(sample_table, select = -filename),
+        design = design_formula
+      )
+  } else if (!use_txi & !has_header) {
 
     # construct the object from HTSeq files
-    dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
-                                      directory = dir,
-                                      design =  designFormula)
-    colnames(dds) <- row.names(sampleTable)
+    dds <- DESeqDataSetFromHTSeqCount(
+      sampleTable = sample_table,
+      directory = dir,
+      design = design_formula
+    )
+    colnames(dds) <- row.names(sample_table)
 
   } else {
       # construct the object using tximport
       library("tximport")
-      txiFiles <- as.character(sampleTable$filename)
-      labs <- row.names(sampleTable)
-      names(txiFiles) <- labs
-      if (!is.null(gffFile)) {
+      txi_files <- as.character(sample_table$filename)
+      labs <- row.names(sample_table)
+      names(txi_files) <- labs
+      if (!is.null(gff_file)) {
         # first need to make the tx2gene table
         # this takes ~2-3 minutes using Bioconductor functions
         suppressPackageStartupMessages({
           library("GenomicFeatures")
         })
-        txdb <- makeTxDbFromGFF(gffFile)
+        txdb <- makeTxDbFromGFF(gff_file)
         k <- keys(txdb, keytype = "TXNAME")
-        tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
-        # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
-        tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
+        tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME")
+        # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name)
+        tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME)  # nolint
       }
-      try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
+      try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene))
       if (!exists("txi")) {
         # Remove version from transcript IDs in tx2gene...
-        tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME)
-        # ...and in txiFiles
-        txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE)
+        tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME)  # nolint
+        # ...and in txi_files
+        txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE)
       }
-      dds <- DESeqDataSetFromTximport(txi,
-                                      subset(sampleTable, select=-c(filename)),
-                                      designFormula)
+      dds <- DESeqDataSetFromTximport(
+        txi,
+        subset(sample_table, select = -c(filename)),
+        design_formula
+      )
   }
   return(dds)
 }