Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 17:d9e5cadc7f0b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
author | iuc |
---|---|
date | Wed, 05 Sep 2018 15:54:03 -0400 |
parents | a416957ee305 |
children | 3bf1b3ec1ddf |
comparison
equal
deleted
inserted
replaced
16:a416957ee305 | 17:d9e5cadc7f0b |
---|---|
44 # get options, using the spec as defined by the enclosed list. | 44 # get options, using the spec as defined by the enclosed list. |
45 # we read the options from the default: commandArgs(TRUE). | 45 # we read the options from the default: commandArgs(TRUE). |
46 spec <- matrix(c( | 46 spec <- matrix(c( |
47 "quiet", "q", 0, "logical", | 47 "quiet", "q", 0, "logical", |
48 "help", "h", 0, "logical", | 48 "help", "h", 0, "logical", |
49 "batch_factors", "", 1, "character", | |
49 "outfile", "o", 1, "character", | 50 "outfile", "o", 1, "character", |
50 "countsfile", "n", 1, "character", | 51 "countsfile", "n", 1, "character", |
51 "header", "H", 0, "logical", | 52 "header", "H", 0, "logical", |
52 "factors", "f", 1, "character", | 53 "factors", "f", 1, "character", |
53 "files_to_labels", "l", 1, "character", | 54 "files_to_labels", "l", 1, "character", |
85 TRUE | 86 TRUE |
86 } else { | 87 } else { |
87 FALSE | 88 FALSE |
88 } | 89 } |
89 | 90 |
90 if (!is.null(opt$header)) { | 91 source_local <- function(fname){ |
91 hasHeader <- TRUE | 92 argv <- commandArgs(trailingOnly = FALSE) |
92 } else { | 93 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) |
93 hasHeader <- FALSE | 94 source(paste(base_dir, fname, sep="/")) |
94 } | 95 } |
95 | 96 |
96 if (!is.null(opt$tximport)) { | 97 source_local('get_deseq_dataset.R') |
97 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") | |
98 if (tolower(file_ext(opt$tx2gene)) == "gtf") { | |
99 gtfFile <- opt$tx2gene | |
100 } else { | |
101 gtfFile <- NULL | |
102 tx2gene <- read.table(opt$tx2gene, header=FALSE) | |
103 } | |
104 useTXI <- TRUE | |
105 } else { | |
106 useTXI <- FALSE | |
107 } | |
108 | 98 |
109 suppressPackageStartupMessages({ | 99 suppressPackageStartupMessages({ |
110 library("DESeq2") | 100 library("DESeq2") |
111 library("RColorBrewer") | 101 library("RColorBrewer") |
112 library("gplots") | 102 library("gplots") |
195 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) | 185 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) |
196 } | 186 } |
197 cat("\n---------------------\n") | 187 cat("\n---------------------\n") |
198 } | 188 } |
199 | 189 |
200 # For JSON input from Galaxy, path is absolute | 190 dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene) |
201 dir <- "" | 191 |
202 | 192 apply_batch_factors <- function (dds, batch_factors) { |
203 if (!useTXI & hasHeader) { | 193 rownames(batch_factors) <- batch_factors$identifier |
204 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) | 194 batch_factors <- subset(batch_factors, select = -c(identifier, condition)) |
205 tbl <- do.call("cbind", countfiles) | 195 dds_samples <- colnames(dds) |
206 colnames(tbl) <- rownames(sampleTable) # take sample ids from header | 196 batch_samples <- rownames(batch_factors) |
207 | 197 if (!setequal(batch_samples, dds_samples)) { |
208 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | 198 stop("Batch factor names don't correspond to input sample names, check input files") |
209 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", | 199 } |
210 "not_aligned", "alignment_not_unique") | 200 dds_data <- colData(dds) |
211 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames | 201 # Merge dds_data with batch_factors using indexes, which are sample names |
212 tbl <- tbl[!specialRows, , drop = FALSE] | 202 # Set sort to False, which maintains the order in dds_data |
213 | 203 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F) |
214 dds <- DESeqDataSetFromMatrix(countData = tbl, | 204 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] |
215 colData = sampleTable[,-c(1:2), drop=FALSE], | 205 for (factor in colnames(batch_factors)) { |
216 design = designFormula) | 206 dds[[factor]] <- batch_factors[[factor]] |
217 } else if (!useTXI & !hasHeader) { | 207 } |
218 | 208 colnames(dds) <- reordered_batch[,1] |
219 # construct the object from HTSeq files | 209 return(dds) |
220 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 210 } |
221 directory = dir, | 211 |
222 design = designFormula) | 212 if (!is.null(opt$batch_factors)) { |
223 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) | 213 batch_factors <- read.table(opt$batch_factors, sep="\t", header=T) |
224 colnames(dds) <- labs | 214 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) |
225 | 215 batch_design <- colnames(batch_factors)[-c(1,2)] |
226 } else { | 216 designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + "))) |
227 # construct the object using tximport | 217 design(dds) <- designFormula |
228 # first need to make the tx2gene table | |
229 # this takes ~2-3 minutes using Bioconductor functions | |
230 if (!is.null(gtfFile)) { | |
231 suppressPackageStartupMessages({ | |
232 library("GenomicFeatures") | |
233 }) | |
234 txdb <- makeTxDbFromGFF(gtfFile, format="gtf") | |
235 k <- keys(txdb, keytype = "GENEID") | |
236 df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") | |
237 tx2gene <- df[, 2:1] # tx ID, then gene ID | |
238 } | |
239 library("tximport") | |
240 txiFiles <- as.character(sampleTable[,2]) | |
241 labs <- unname(unlist(filenames_to_labels[sampleTable[,1]])) | |
242 names(txiFiles) <- labs | |
243 txi <- tximport(txiFiles, type=opt$txtype, tx2gene=tx2gene) | |
244 dds <- DESeqDataSetFromTximport(txi, | |
245 sampleTable[,3:ncol(sampleTable),drop=FALSE], | |
246 designFormula) | |
247 } | 218 } |
248 | 219 |
249 if (verbose) { | 220 if (verbose) { |
250 cat("DESeq2 run information\n\n") | 221 cat("DESeq2 run information\n\n") |
251 cat("sample table:\n") | 222 cat("sample table:\n") |