Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 15:9a616afdbda5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
author | iuc |
---|---|
date | Sat, 19 May 2018 03:55:48 -0400 |
parents | d0c39b5e78cf |
children | a416957ee305 |
comparison
equal
deleted
inserted
replaced
14:d0c39b5e78cf | 15:9a616afdbda5 |
---|---|
6 # This argument is required: | 6 # This argument is required: |
7 # | 7 # |
8 # 'factors' a JSON list object from Galaxy | 8 # 'factors' a JSON list object from Galaxy |
9 # | 9 # |
10 # the output file has columns: | 10 # the output file has columns: |
11 # | 11 # |
12 # baseMean (mean normalized count) | 12 # baseMean (mean normalized count) |
13 # log2FoldChange (by default a moderated LFC estimate) | 13 # log2FoldChange (by default a moderated LFC estimate) |
14 # lfcSE (the standard error) | 14 # lfcSE (the standard error) |
15 # stat (the Wald statistic) | 15 # stat (the Wald statistic) |
16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) | 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) |
17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) | 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) |
18 # | 18 # |
19 # the first variable in 'factors' will be the primary factor. | 19 # the first variable in 'factors' will be the primary factor. |
20 # the levels of the primary factor are used in the order of appearance in factors. | 20 # the levels of the primary factor are used in the order of appearance in factors. |
21 # | 21 # |
22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' | 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' |
23 # | 23 # |
46 spec <- matrix(c( | 46 spec <- matrix(c( |
47 "quiet", "q", 0, "logical", | 47 "quiet", "q", 0, "logical", |
48 "help", "h", 0, "logical", | 48 "help", "h", 0, "logical", |
49 "outfile", "o", 1, "character", | 49 "outfile", "o", 1, "character", |
50 "countsfile", "n", 1, "character", | 50 "countsfile", "n", 1, "character", |
51 "header", "H", 0, "logical", | |
51 "factors", "f", 1, "character", | 52 "factors", "f", 1, "character", |
52 "files_to_labels", "l", 1, "character", | 53 "files_to_labels", "l", 1, "character", |
53 "plots" , "p", 1, "character", | 54 "plots" , "p", 1, "character", |
54 "tximport", "i", 0, "logical", | 55 "tximport", "i", 0, "logical", |
55 "txtype", "y", 1, "character", | 56 "txtype", "y", 1, "character", |
82 | 83 |
83 verbose <- if (is.null(opt$quiet)) { | 84 verbose <- if (is.null(opt$quiet)) { |
84 TRUE | 85 TRUE |
85 } else { | 86 } else { |
86 FALSE | 87 FALSE |
88 } | |
89 | |
90 if (!is.null(opt$header)) { | |
91 hasHeader <- TRUE | |
92 } else { | |
93 hasHeader <- FALSE | |
87 } | 94 } |
88 | 95 |
89 if (!is.null(opt$tximport)) { | 96 if (!is.null(opt$tximport)) { |
90 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") | 97 if (is.null(opt$tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") |
91 if (tolower(file_ext(opt$tx2gene)) == "gtf") { | 98 if (tolower(file_ext(opt$tx2gene)) == "gtf") { |
136 rownames(sampleTable) <- labs | 143 rownames(sampleTable) <- labs |
137 | 144 |
138 primaryFactor <- factors[1] | 145 primaryFactor <- factors[1] |
139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) | 146 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) |
140 | 147 |
141 if (verbose) { | |
142 cat("DESeq2 run information\n\n") | |
143 cat("sample table:\n") | |
144 print(sampleTable[,-c(1:2),drop=FALSE]) | |
145 cat("\ndesign formula:\n") | |
146 print(designFormula) | |
147 cat("\n\n") | |
148 } | |
149 | |
150 # these are plots which are made once for each analysis | 148 # these are plots which are made once for each analysis |
151 generateGenericPlots <- function(dds, factors) { | 149 generateGenericPlots <- function(dds, factors) { |
152 library("ggplot2") | 150 library("ggplot2") |
153 library("ggrepel") | 151 library("ggrepel") |
154 library("pheatmap") | 152 library("pheatmap") |
200 } | 198 } |
201 | 199 |
202 # For JSON input from Galaxy, path is absolute | 200 # For JSON input from Galaxy, path is absolute |
203 dir <- "" | 201 dir <- "" |
204 | 202 |
205 if (!useTXI) { | 203 if (!useTXI & hasHeader) { |
204 countfiles <- lapply(as.character(sampleTable$filename), function(x){read.delim(x, row.names=1)}) | |
205 tbl <- do.call("cbind", countfiles) | |
206 rownames(sampleTable) <- colnames(tbl) # take sample ids from header | |
207 | |
208 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | |
209 oldSpecialNames <- c("no_feature", "ambiguous", "too_low_aQual", | |
210 "not_aligned", "alignment_not_unique") | |
211 specialRows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% oldSpecialNames | |
212 tbl <- tbl[!specialRows, , drop = FALSE] | |
213 | |
214 dds <- DESeqDataSetFromMatrix(countData = tbl, | |
215 colData = sampleTable[,-c(1:2), drop=FALSE], | |
216 design = designFormula) | |
217 } else if (!useTXI & !hasHeader) { | |
218 | |
206 # construct the object from HTSeq files | 219 # construct the object from HTSeq files |
207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 220 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
208 directory = dir, | 221 directory = dir, |
209 design = designFormula) | 222 design = designFormula) |
210 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) | 223 labs <- unname(unlist(filenames_to_labels[colnames(dds)])) |
211 colnames(dds) <- labs | 224 colnames(dds) <- labs |
225 | |
212 } else { | 226 } else { |
213 # construct the object using tximport | 227 # construct the object using tximport |
214 # first need to make the tx2gene table | 228 # first need to make the tx2gene table |
215 # this takes ~2-3 minutes using Bioconductor functions | 229 # this takes ~2-3 minutes using Bioconductor functions |
216 if (!is.null(gtfFile)) { | 230 if (!is.null(gtfFile)) { |
230 dds <- DESeqDataSetFromTximport(txi, | 244 dds <- DESeqDataSetFromTximport(txi, |
231 sampleTable[,3:ncol(sampleTable),drop=FALSE], | 245 sampleTable[,3:ncol(sampleTable),drop=FALSE], |
232 designFormula) | 246 designFormula) |
233 } | 247 } |
234 | 248 |
235 if (verbose) cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) | 249 if (verbose) { |
250 cat("DESeq2 run information\n\n") | |
251 cat("sample table:\n") | |
252 print(sampleTable[,-c(1:2),drop=FALSE]) | |
253 cat("\ndesign formula:\n") | |
254 print(designFormula) | |
255 cat("\n\n") | |
256 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) | |
257 } | |
258 | |
236 # optional outlier behavior | 259 # optional outlier behavior |
237 if (is.null(opt$outlier_replace_off)) { | 260 if (is.null(opt$outlier_replace_off)) { |
238 minRep <- 7 | 261 minRep <- 7 |
239 } else { | 262 } else { |
240 minRep <- Inf | 263 minRep <- Inf |
241 if (verbose) cat("outlier replacement off\n") | 264 if (verbose) cat("outlier replacement off\n") |
242 } | 265 } |
243 if (is.null(opt$outlier_filter_off)) { | 266 if (is.null(opt$outlier_filter_off)) { |
244 cooksCutoff <- TRUE | 267 cooksCutoff <- TRUE |
245 } else { | 268 } else { |
246 cooksCutoff <- FALSE | 269 cooksCutoff <- FALSE |
247 if (verbose) cat("outlier filtering off\n") | 270 if (verbose) cat("outlier filtering off\n") |
248 } | 271 } |
249 | 272 |
250 # optional automatic mean filtering | 273 # optional automatic mean filtering |