Mercurial > repos > iuc > deseq2
comparison get_deseq_dataset.R @ 25:de44f8eff84a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
author | iuc |
---|---|
date | Wed, 25 Nov 2020 18:36:55 +0000 |
parents | 0696db066a5b |
children | 8fe98f7094de |
comparison
equal
deleted
inserted
replaced
24:71bacea10eee | 25:de44f8eff84a |
---|---|
1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { | 1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { |
2 | 2 |
3 dir <- "" | 3 dir <- "" |
4 | 4 |
5 if (!is.null(header)) { | 5 has_header <- !is.null(header) |
6 hasHeader <- TRUE | 6 use_txi <- !is.null(tximport) |
7 } else { | 7 if (use_txi) { |
8 hasHeader <- FALSE | 8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") |
9 if (tolower(file_ext(tx2gene)) == "gff") { | |
10 gff_file <- tx2gene | |
11 } else { | |
12 gff_file <- NULL | |
13 tx2gene <- read.table(tx2gene, header = has_header) | |
14 } | |
9 } | 15 } |
10 | 16 |
11 if (!is.null(tximport)) { | 17 if (!use_txi & has_header) { |
12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") | 18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) |
13 if (tolower(file_ext(tx2gene)) == "gff") { | |
14 gffFile <-tx2gene | |
15 } else { | |
16 gffFile <- NULL | |
17 tx2gene <- read.table(tx2gene, header=hasHeader) | |
18 } | |
19 useTXI <- TRUE | |
20 } else { | |
21 useTXI <- FALSE | |
22 } | |
23 | |
24 if (!useTXI & hasHeader) { | |
25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) | |
26 tbl <- do.call("cbind", countfiles) | 19 tbl <- do.call("cbind", countfiles) |
27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header | 20 colnames(tbl) <- rownames(sample_table) # take sample ids from header |
28 | 21 |
29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | 22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) |
30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", | 23 old_special_names <- c( |
31 "not_aligned", "alignment_not_unique") | 24 "no_feature", |
32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames | 25 "ambiguous", |
33 tbl <- tbl[!specialRows, , drop = FALSE] | 26 "too_low_aQual", |
27 "not_aligned", | |
28 "alignment_not_unique" | |
29 ) | |
30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names | |
31 tbl <- tbl[!special_rows, , drop = FALSE] | |
34 | 32 |
35 dds <- DESeqDataSetFromMatrix(countData = tbl, | 33 dds <- DESeqDataSetFromMatrix( |
36 colData = subset(sampleTable, select=-(filename)), | 34 countData = tbl, |
37 design = designFormula) | 35 colData = subset(sample_table, select = -filename), |
38 } else if (!useTXI & !hasHeader) { | 36 design = design_formula |
37 ) | |
38 } else if (!use_txi & !has_header) { | |
39 | 39 |
40 # construct the object from HTSeq files | 40 # construct the object from HTSeq files |
41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 41 dds <- DESeqDataSetFromHTSeqCount( |
42 directory = dir, | 42 sampleTable = sample_table, |
43 design = designFormula) | 43 directory = dir, |
44 colnames(dds) <- row.names(sampleTable) | 44 design = design_formula |
45 ) | |
46 colnames(dds) <- row.names(sample_table) | |
45 | 47 |
46 } else { | 48 } else { |
47 # construct the object using tximport | 49 # construct the object using tximport |
48 library("tximport") | 50 library("tximport") |
49 txiFiles <- as.character(sampleTable$filename) | 51 txi_files <- as.character(sample_table$filename) |
50 labs <- row.names(sampleTable) | 52 labs <- row.names(sample_table) |
51 names(txiFiles) <- labs | 53 names(txi_files) <- labs |
52 if (!is.null(gffFile)) { | 54 if (!is.null(gff_file)) { |
53 # first need to make the tx2gene table | 55 # first need to make the tx2gene table |
54 # this takes ~2-3 minutes using Bioconductor functions | 56 # this takes ~2-3 minutes using Bioconductor functions |
55 suppressPackageStartupMessages({ | 57 suppressPackageStartupMessages({ |
56 library("GenomicFeatures") | 58 library("GenomicFeatures") |
57 }) | 59 }) |
58 txdb <- makeTxDbFromGFF(gffFile) | 60 txdb <- makeTxDbFromGFF(gff_file) |
59 k <- keys(txdb, keytype = "TXNAME") | 61 k <- keys(txdb, keytype = "TXNAME") |
60 tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") | 62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") |
61 # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) | 63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) |
62 tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) | 64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint |
63 } | 65 } |
64 try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) | 66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) |
65 if (!exists("txi")) { | 67 if (!exists("txi")) { |
66 # Remove version from transcript IDs in tx2gene... | 68 # Remove version from transcript IDs in tx2gene... |
67 tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) | 69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint |
68 # ...and in txiFiles | 70 # ...and in txi_files |
69 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) | 71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) |
70 } | 72 } |
71 dds <- DESeqDataSetFromTximport(txi, | 73 dds <- DESeqDataSetFromTximport( |
72 subset(sampleTable, select=-c(filename)), | 74 txi, |
73 designFormula) | 75 subset(sample_table, select = -c(filename)), |
76 design_formula | |
77 ) | |
74 } | 78 } |
75 return(dds) | 79 return(dds) |
76 } | 80 } |