comparison get_deseq_dataset.R @ 17:d9e5cadc7f0b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
author iuc
date Wed, 05 Sep 2018 15:54:03 -0400
parents
children c56e0689e46e
comparison
equal deleted inserted replaced
16:a416957ee305 17:d9e5cadc7f0b
1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
2
3 dir <- ""
4
5 if (!is.null(header)) {
6 hasHeader <- TRUE
7 } else {
8 hasHeader <- FALSE
9 }
10
11 if (!is.null(tximport)) {
12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
13 if (tolower(file_ext(opt$tx2gene)) == "gtf") {
14 gtfFile <-tx2gene
15 } else {
16 gtfFile <- NULL
17 tx2gene <- read.table(tx2gene, header=FALSE)
18 }
19 useTXI <- TRUE
20 } else {
21 useTXI <- FALSE
22 }
23
24 if (!useTXI & hasHeader) {
25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
26 tbl <- do.call("cbind", countfiles)
27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header
28
29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
31 "not_aligned", "alignment_not_unique")
32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
33 tbl <- tbl[!specialRows, , drop = FALSE]
34
35 dds <- DESeqDataSetFromMatrix(countData = tbl,
36 colData = subset(sampleTable, select=-(filename)),
37 design = designFormula)
38 } else if (!useTXI & !hasHeader) {
39
40 # construct the object from HTSeq files
41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
42 directory = dir,
43 design = designFormula)
44 colnames(dds) <- row.names(sampleTable)
45
46 } else {
47 # construct the object using tximport
48 # first need to make the tx2gene table
49 # this takes ~2-3 minutes using Bioconductor functions
50 if (!is.null(gtfFile)) {
51 suppressPackageStartupMessages({
52 library("GenomicFeatures")
53 })
54 txdb <- makeTxDbFromGFF(gtfFile, format="gtf")
55 k <- keys(txdb, keytype = "GENEID")
56 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME")
57 tx2gene <- df[, 2:1] # tx ID, then gene ID
58 }
59 library("tximport")
60 txiFiles <- as.character(sampleTable$filename)
61 labs <- row.names(sampleTable)
62 names(txiFiles) <- labs
63 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
64 dds <- DESeqDataSetFromTximport(txi,
65 subset(sampleTable, select=-c(filename)),
66 designFormula)
67 }
68 return(dds)
69 }