Mercurial > repos > iuc > deseq2
annotate get_deseq_dataset.R @ 26:6a3a025714d3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 0b502ee6bfa3e861c8678cabfeb670dc8dfe9519"
author | iuc |
---|---|
date | Fri, 08 Jan 2021 20:29:54 +0000 |
parents | de44f8eff84a |
children | 8fe98f7094de |
rev | line source |
---|---|
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
2 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
3 dir <- "" |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
4 |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
5 has_header <- !is.null(header) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
6 use_txi <- !is.null(tximport) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
7 if (use_txi) { |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") |
23
0696db066a5b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
iuc
parents:
20
diff
changeset
|
9 if (tolower(file_ext(tx2gene)) == "gff") { |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
10 gff_file <- tx2gene |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
11 } else { |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
12 gff_file <- NULL |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
13 tx2gene <- read.table(tx2gene, header = has_header) |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
14 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
15 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
16 |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
17 if (!use_txi & has_header) { |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
19 tbl <- do.call("cbind", countfiles) |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
20 colnames(tbl) <- rownames(sample_table) # take sample ids from header |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
21 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
23 old_special_names <- c( |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
24 "no_feature", |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
25 "ambiguous", |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
26 "too_low_aQual", |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
27 "not_aligned", |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
28 "alignment_not_unique" |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
29 ) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
31 tbl <- tbl[!special_rows, , drop = FALSE] |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
32 |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
33 dds <- DESeqDataSetFromMatrix( |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
34 countData = tbl, |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
35 colData = subset(sample_table, select = -filename), |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
36 design = design_formula |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
37 ) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
38 } else if (!use_txi & !has_header) { |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
39 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
40 # construct the object from HTSeq files |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
41 dds <- DESeqDataSetFromHTSeqCount( |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
42 sampleTable = sample_table, |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
43 directory = dir, |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
44 design = design_formula |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
45 ) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
46 colnames(dds) <- row.names(sample_table) |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
47 |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
48 } else { |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
49 # construct the object using tximport |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
50 library("tximport") |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
51 txi_files <- as.character(sample_table$filename) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
52 labs <- row.names(sample_table) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
53 names(txi_files) <- labs |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
54 if (!is.null(gff_file)) { |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
55 # first need to make the tx2gene table |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
56 # this takes ~2-3 minutes using Bioconductor functions |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
57 suppressPackageStartupMessages({ |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
58 library("GenomicFeatures") |
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
59 }) |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
60 txdb <- makeTxDbFromGFF(gff_file) |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
61 k <- keys(txdb, keytype = "TXNAME") |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
65 } |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
67 if (!exists("txi")) { |
20
89d26b11d452
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents:
19
diff
changeset
|
68 # Remove version from transcript IDs in tx2gene... |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
70 # ...and in txi_files |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) |
19
c56e0689e46e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents:
17
diff
changeset
|
72 } |
25
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
73 dds <- DESeqDataSetFromTximport( |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
74 txi, |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
75 subset(sample_table, select = -c(filename)), |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
76 design_formula |
de44f8eff84a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents:
23
diff
changeset
|
77 ) |
17
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
78 } |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
79 return(dds) |
d9e5cadc7f0b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff
changeset
|
80 } |