Mercurial > repos > iuc > deseq2
annotate get_deseq_dataset.R @ 20:89d26b11d452 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
author | iuc |
---|---|
date | Thu, 06 Dec 2018 15:49:22 -0500 |
parents | c56e0689e46e |
children | 0696db066a5b |
rev | line source |
---|---|
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
2 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
3 dir <- "" |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
4 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
5 if (!is.null(header)) { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
6 hasHeader <- TRUE |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
7 } else { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
8 hasHeader <- FALSE |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
9 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
10 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
11 if (!is.null(tximport)) { |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
13 if (tolower(file_ext(opt$tx2gene)) == "gff") { |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
14 gffFile <-tx2gene |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
15 } else { |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
16 gffFile <- NULL |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
17 tx2gene <- read.table(tx2gene, header=FALSE) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
18 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
19 useTXI <- TRUE |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
20 } else { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
21 useTXI <- FALSE |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
22 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
23 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
24 if (!useTXI & hasHeader) { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
26 tbl <- do.call("cbind", countfiles) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
28 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
31 "not_aligned", "alignment_not_unique") |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
33 tbl <- tbl[!specialRows, , drop = FALSE] |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
34 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
35 dds <- DESeqDataSetFromMatrix(countData = tbl, |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
36 colData = subset(sampleTable, select=-(filename)), |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
37 design = designFormula) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
38 } else if (!useTXI & !hasHeader) { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
39 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
40 # construct the object from HTSeq files |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
42 directory = dir, |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
43 design = designFormula) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
44 colnames(dds) <- row.names(sampleTable) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
45 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
46 } else { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
47 # construct the object using tximport |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
48 library("tximport") |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
49 txiFiles <- as.character(sampleTable$filename) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
50 labs <- row.names(sampleTable) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
51 names(txiFiles) <- labs |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
52 if (!is.null(gffFile)) { |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
53 # first need to make the tx2gene table |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
54 # this takes ~2-3 minutes using Bioconductor functions |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
55 suppressPackageStartupMessages({ |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
56 library("GenomicFeatures") |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
57 }) |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
58 txdb <- makeTxDbFromGFF(gffFile) |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
59 k <- keys(txdb, keytype = "TXNAME") |
20
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
60 tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") |
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
61 # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) |
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
62 tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
63 } |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
64 try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
65 if (!exists("txi")) { |
20
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
66 # Remove version from transcript IDs in tx2gene... |
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
67 tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) |
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
68 # ...and in txiFiles |
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
69 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
70 } |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
71 dds <- DESeqDataSetFromTximport(txi, |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
72 subset(sampleTable, select=-c(filename)), |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
73 designFormula) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
74 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
75 return(dds) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
76 } |