comparison get_deseq_dataset.R @ 25:de44f8eff84a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
author iuc
date Wed, 25 Nov 2020 18:36:55 +0000
parents 0696db066a5b
children 8fe98f7094de
comparison
equal deleted inserted replaced
24:71bacea10eee 25:de44f8eff84a
1 get_deseq_dataset <- function(sampleTable, header, designFormula, tximport, txtype, tx2gene) { 1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) {
2 2
3 dir <- "" 3 dir <- ""
4 4
5 if (!is.null(header)) { 5 has_header <- !is.null(header)
6 hasHeader <- TRUE 6 use_txi <- !is.null(tximport)
7 } else { 7 if (use_txi) {
8 hasHeader <- FALSE 8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
9 if (tolower(file_ext(tx2gene)) == "gff") {
10 gff_file <- tx2gene
11 } else {
12 gff_file <- NULL
13 tx2gene <- read.table(tx2gene, header = has_header)
14 }
9 } 15 }
10 16
11 if (!is.null(tximport)) { 17 if (!use_txi & has_header) {
12 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") 18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
13 if (tolower(file_ext(tx2gene)) == "gff") {
14 gffFile <-tx2gene
15 } else {
16 gffFile <- NULL
17 tx2gene <- read.table(tx2gene, header=hasHeader)
18 }
19 useTXI <- TRUE
20 } else {
21 useTXI <- FALSE
22 }
23
24 if (!useTXI & hasHeader) {
25 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
26 tbl <- do.call("cbind", countfiles) 19 tbl <- do.call("cbind", countfiles)
27 colnames(tbl) <- rownames(sampleTable) # take sample ids from header 20 colnames(tbl) <- rownames(sample_table) # take sample ids from header
28 21
29 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) 22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
30 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", 23 old_special_names <- c(
31 "not_aligned", "alignment_not_unique") 24 "no_feature",
32 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames 25 "ambiguous",
33 tbl <- tbl[!specialRows, , drop = FALSE] 26 "too_low_aQual",
27 "not_aligned",
28 "alignment_not_unique"
29 )
30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names
31 tbl <- tbl[!special_rows, , drop = FALSE]
34 32
35 dds <- DESeqDataSetFromMatrix(countData = tbl, 33 dds <- DESeqDataSetFromMatrix(
36 colData = subset(sampleTable, select=-(filename)), 34 countData = tbl,
37 design = designFormula) 35 colData = subset(sample_table, select = -filename),
38 } else if (!useTXI & !hasHeader) { 36 design = design_formula
37 )
38 } else if (!use_txi & !has_header) {
39 39
40 # construct the object from HTSeq files 40 # construct the object from HTSeq files
41 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 41 dds <- DESeqDataSetFromHTSeqCount(
42 directory = dir, 42 sampleTable = sample_table,
43 design = designFormula) 43 directory = dir,
44 colnames(dds) <- row.names(sampleTable) 44 design = design_formula
45 )
46 colnames(dds) <- row.names(sample_table)
45 47
46 } else { 48 } else {
47 # construct the object using tximport 49 # construct the object using tximport
48 library("tximport") 50 library("tximport")
49 txiFiles <- as.character(sampleTable$filename) 51 txi_files <- as.character(sample_table$filename)
50 labs <- row.names(sampleTable) 52 labs <- row.names(sample_table)
51 names(txiFiles) <- labs 53 names(txi_files) <- labs
52 if (!is.null(gffFile)) { 54 if (!is.null(gff_file)) {
53 # first need to make the tx2gene table 55 # first need to make the tx2gene table
54 # this takes ~2-3 minutes using Bioconductor functions 56 # this takes ~2-3 minutes using Bioconductor functions
55 suppressPackageStartupMessages({ 57 suppressPackageStartupMessages({
56 library("GenomicFeatures") 58 library("GenomicFeatures")
57 }) 59 })
58 txdb <- makeTxDbFromGFF(gffFile) 60 txdb <- makeTxDbFromGFF(gff_file)
59 k <- keys(txdb, keytype = "TXNAME") 61 k <- keys(txdb, keytype = "TXNAME")
60 tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") 62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME")
61 # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) 63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name)
62 tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) 64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint
63 } 65 }
64 try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) 66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene))
65 if (!exists("txi")) { 67 if (!exists("txi")) {
66 # Remove version from transcript IDs in tx2gene... 68 # Remove version from transcript IDs in tx2gene...
67 tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME) 69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint
68 # ...and in txiFiles 70 # ...and in txi_files
69 txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE) 71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE)
70 } 72 }
71 dds <- DESeqDataSetFromTximport(txi, 73 dds <- DESeqDataSetFromTximport(
72 subset(sampleTable, select=-c(filename)), 74 txi,
73 designFormula) 75 subset(sample_table, select = -c(filename)),
76 design_formula
77 )
74 } 78 }
75 return(dds) 79 return(dds)
76 } 80 }