comparison deseq2.R @ 15:9a616afdbda5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
author iuc
date Sat, 19 May 2018 03:55:48 -0400
parents d0c39b5e78cf
children a416957ee305
comparison
equal deleted inserted replaced
14:d0c39b5e78cf 15:9a616afdbda5
6 # This argument is required: 6 # This argument is required:
7 # 7 #
8 # 'factors' a JSON list object from Galaxy 8 # 'factors' a JSON list object from Galaxy
9 # 9 #
10 # the output file has columns: 10 # the output file has columns:
11 # 11 #
12 # baseMean (mean normalized count) 12 # baseMean (mean normalized count)
13 # log2FoldChange (by default a moderated LFC estimate) 13 # log2FoldChange (by default a moderated LFC estimate)
14 # lfcSE (the standard error) 14 # lfcSE (the standard error)
15 # stat (the Wald statistic) 15 # stat (the Wald statistic)
16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal)
17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter)
18 # 18 #
19 # the first variable in 'factors' will be the primary factor. 19 # the first variable in 'factors' will be the primary factor.
20 # the levels of the primary factor are used in the order of appearance in factors. 20 # the levels of the primary factor are used in the order of appearance in factors.
21 # 21 #
22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile'
23 # 23 #
46 spec <- matrix(c( 46 spec <- matrix(c(
47 "quiet", "q", 0, "logical", 47 "quiet", "q", 0, "logical",
48 "help", "h", 0, "logical", 48 "help", "h", 0, "logical",
49 "outfile", "o", 1, "character", 49 "outfile", "o", 1, "character",
50 "countsfile", "n", 1, "character", 50 "countsfile", "n", 1, "character",
51 "header", "H", 0, "logical",
51 "factors", "f", 1, "character", 52 "factors", "f", 1, "character",
52 "files_to_labels", "l", 1, "character", 53 "files_to_labels", "l", 1, "character",
53 "plots" , "p", 1, "character", 54 "plots" , "p", 1, "character",
54 "tximport", "i", 0, "logical", 55 "tximport", "i", 0, "logical",
55 "txtype", "y", 1, "character", 56 "txtype", "y", 1, "character",
82 83
83 verbose <- if (is.null(opt$quiet)) { 84 verbose <- if (is.null(opt$quiet)) {
84 TRUE 85 TRUE
85 } else { 86 } else {
86 FALSE 87 FALSE
88 }
89
90 if (!is.null(opt$header)) {
91 hasHeader <- TRUE
92 } else {
93 hasHeader <- FALSE
87 } 94 }
88 95
89 if (!is.null(opt$tximport)) { 96 if (!is.null(opt$tximport)) {
90 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") 97 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport")
91 if (tolower(file_ext(opt$tx2gene)) == "gtf") { 98 if (tolower(file_ext(opt$tx2gene)) == "gtf") {
136 rownames(sampleTable) <- labs 143 rownames(sampleTable) <- labs
137 144
138 primaryFactor <- factors[1] 145 primaryFactor <- factors[1]
139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) 146 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
140 147
141 if (verbose) {
142 cat("DESeq2 run information\n\n")
143 cat("sample table:\n")
144 print(sampleTable[,-c(1:2),drop=FALSE])
145 cat("\ndesign formula:\n")
146 print(designFormula)
147 cat("\n\n")
148 }
149
150 # these are plots which are made once for each analysis 148 # these are plots which are made once for each analysis
151 generateGenericPlots <- function(dds, factors) { 149 generateGenericPlots <- function(dds, factors) {
152 library("ggplot2") 150 library("ggplot2")
153 library("ggrepel") 151 library("ggrepel")
154 library("pheatmap") 152 library("pheatmap")
200 } 198 }
201 199
202 # For JSON input from Galaxy, path is absolute 200 # For JSON input from Galaxy, path is absolute
203 dir <- "" 201 dir <- ""
204 202
205 if (!useTXI) { 203 if (!useTXI & hasHeader) {
204 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)})
205 tbl <- do.call("cbind", countfiles)
206 rownames(sampleTable) <- colnames(tbl) # take sample ids from header
207
208 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
209 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual",
210 "not_aligned", "alignment_not_unique")
211 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames
212 tbl <- tbl[!specialRows, , drop = FALSE]
213
214 dds <- DESeqDataSetFromMatrix(countData = tbl,
215 colData = sampleTable[,-c(1:2), drop=FALSE],
216 design = designFormula)
217 } else if (!useTXI & !hasHeader) {
218
206 # construct the object from HTSeq files 219 # construct the object from HTSeq files
207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 220 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
208 directory = dir, 221 directory = dir,
209 design = designFormula) 222 design = designFormula)
210 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) 223 labs <- unname(unlist(filenames_to_labels[colnames(dds)]))
211 colnames(dds) <- labs 224 colnames(dds) <- labs
225
212 } else { 226 } else {
213 # construct the object using tximport 227 # construct the object using tximport
214 # first need to make the tx2gene table 228 # first need to make the tx2gene table
215 # this takes ~2-3 minutes using Bioconductor functions 229 # this takes ~2-3 minutes using Bioconductor functions
216 if (!is.null(gtfFile)) { 230 if (!is.null(gtfFile)) {
230 dds <- DESeqDataSetFromTximport(txi, 244 dds <- DESeqDataSetFromTximport(txi,
231 sampleTable[,3:ncol(sampleTable),drop=FALSE], 245 sampleTable[,3:ncol(sampleTable),drop=FALSE],
232 designFormula) 246 designFormula)
233 } 247 }
234 248
235 if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) 249 if (verbose) {
250 cat("DESeq2 run information\n\n")
251 cat("sample table:\n")
252 print(sampleTable[,-c(1:2),drop=FALSE])
253 cat("\ndesign formula:\n")
254 print(designFormula)
255 cat("\n\n")
256 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n"))
257 }
258
236 # optional outlier behavior 259 # optional outlier behavior
237 if (is.null(opt$outlier_replace_off)) { 260 if (is.null(opt$outlier_replace_off)) {
238 minRep <- 7 261 minRep <- 7
239 } else { 262 } else {
240 minRep <- Inf 263 minRep <- Inf
241 if (verbose) cat("outlier replacement off\n") 264 if (verbose) cat("outlier replacement off\n")
242 } 265 }
243 if (is.null(opt$outlier_filter_off)) { 266 if (is.null(opt$outlier_filter_off)) {
244 cooksCutoff <- TRUE 267 cooksCutoff <- TRUE
245 } else { 268 } else {
246 cooksCutoff <- FALSE 269 cooksCutoff <- FALSE
247 if (verbose) cat("outlier filtering off\n") 270 if (verbose) cat("outlier filtering off\n")
248 } 271 }
249 272
250 # optional automatic mean filtering 273 # optional automatic mean filtering