annotate get_deseq_dataset.R @ 29:cd9874cb9019 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit cbeb1c4c436be04323bd9a809a6393d00b168d07"
author iuc
date Mon, 29 Nov 2021 18:16:48 +0000
parents de44f8eff84a
children 8fe98f7094de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) {
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
2
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
3 dir <- ""
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
4
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
5 has_header <- !is.null(header)
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
6 use_txi <- !is.null(tximport)
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
7 if (use_txi) {
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
23
0696db066a5b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9ed3d83cc447ee897af867362bf1dd67af8a11c2
iuc
parents: 20
diff changeset
9 if (tolower(file_ext(tx2gene)) == "gff") {
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
10 gff_file <- tx2gene
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
11 } else {
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
12 gff_file <- NULL
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
13 tx2gene <- read.table(tx2gene, header = has_header)
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
14 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
15 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
16
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
17 if (!use_txi & has_header) {
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
19 tbl <- do.call("cbind", countfiles)
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
20 colnames(tbl) <- rownames(sample_table) # take sample ids from header
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
21
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
23 old_special_names <- c(
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
24 "no_feature",
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
25 "ambiguous",
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
26 "too_low_aQual",
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
27 "not_aligned",
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
28 "alignment_not_unique"
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
29 )
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
31 tbl <- tbl[!special_rows, , drop = FALSE]
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
32
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
33 dds <- DESeqDataSetFromMatrix(
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
34 countData = tbl,
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
35 colData = subset(sample_table, select = -filename),
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
36 design = design_formula
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
37 )
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
38 } else if (!use_txi & !has_header) {
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
39
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
40 # construct the object from HTSeq files
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
41 dds <- DESeqDataSetFromHTSeqCount(
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
42 sampleTable = sample_table,
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
43 directory = dir,
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
44 design = design_formula
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
45 )
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
46 colnames(dds) <- row.names(sample_table)
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
47
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
48 } else {
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
49 # construct the object using tximport
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
50 library("tximport")
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
51 txi_files <- as.character(sample_table$filename)
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
52 labs <- row.names(sample_table)
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
53 names(txi_files) <- labs
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
54 if (!is.null(gff_file)) {
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
55 # first need to make the tx2gene table
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
56 # this takes ~2-3 minutes using Bioconductor functions
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
57 suppressPackageStartupMessages({
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
58 library("GenomicFeatures")
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
59 })
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
60 txdb <- makeTxDbFromGFF(gff_file)
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
61 k <- keys(txdb, keytype = "TXNAME")
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME")
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name)
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
65 }
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene))
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
67 if (!exists("txi")) {
20
89d26b11d452 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
iuc
parents: 19
diff changeset
68 # Remove version from transcript IDs in tx2gene...
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
70 # ...and in txi_files
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE)
19
c56e0689e46e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
iuc
parents: 17
diff changeset
72 }
25
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
73 dds <- DESeqDataSetFromTximport(
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
74 txi,
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
75 subset(sample_table, select = -c(filename)),
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
76 design_formula
de44f8eff84a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 23
diff changeset
77 )
17
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
78 }
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
79 return(dds)
d9e5cadc7f0b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
80 }