annotate get_deseq_dataset.R @ 23:0696db066a5b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
author iuc
date Tue, 26 Mar 2019 06:25:00 -0400
parents 89d26b11d452
children de44f8eff84a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
2
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
3 dir <- ""
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
4
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
5 if (!is.null(header)) {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
6 hasHeader <- TRUE
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
7 } else {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
8 hasHeader <- FALSE
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
9 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
10
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
11 if (!is.null(tximport)) {
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
23
0696db066a5b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
iuc
parents: 20
diff changeset
13 if (tolower(file_ext(tx2gene)) == "gff") {
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
14 gffFile <-tx2gene
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
15 } else {
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
16 gffFile <- NULL
23
0696db066a5b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
iuc
parents: 20
diff changeset
17 tx2gene <- read.table(tx2gene, header=hasHeader)
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
18 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
19 useTXI <- TRUE
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
20 } else {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
21 useTXI <- FALSE
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
22 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
23
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
24 if (!useTXI & hasHeader) {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
26 tbl <- do.call("cbind", countfiles)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
28
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
31 "not_aligned", "alignment_not_unique")
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
33 tbl <- tbl[!specialRows, , drop = FALSE]
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
34
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
35 dds <- DESeqDataSetFromMatrix(countData = tbl,
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
36 colData = subset(sampleTable, select=-(filename)),
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
37 design = designFormula)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
38 } else if (!useTXI & !hasHeader) {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
39
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
40 # construct the object from HTSeq files
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
42 directory = dir,
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
43 design = designFormula)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
44 colnames(dds) <- row.names(sampleTable)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
45
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
46 } else {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
47 # construct the object using tximport
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
48 library("tximport")
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
49 txiFiles <- as.character(sampleTable$filename)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
50 labs <- row.names(sampleTable)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
51 names(txiFiles) <- labs
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
52 if (!is.null(gffFile)) {
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
53 # first need to make the tx2gene table
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
54 # this takes ~2-3 minutes using Bioconductor functions
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
55 suppressPackageStartupMessages({
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
56 library("GenomicFeatures")
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
57 })
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
58 txdb <- makeTxDbFromGFF(gffFile)
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
59 k <- keys(txdb, keytype = "TXNAME")
20
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
60 tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
61 # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
62 tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
63 }
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
64 try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
65 if (!exists("txi")) {
20
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
66 # Remove version from transcript IDs in tx2gene...
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
67 tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME)
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
68 # ...and in txiFiles
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
69 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE)
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
70 }
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
71 dds <- DESeqDataSetFromTximport(txi,
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
72 subset(sampleTable, select=-c(filename)),
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
73 designFormula)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
74 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
75 return(dds)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
76 }