comparison deseq2.R @ 14:d0c39b5e78cf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit e811a7887db870f4f94f620f52bce656c8d5ba23
author iuc
date Thu, 12 Apr 2018 17:29:45 -0400
parents bd06df00180a
children 9a616afdbda5
comparison
equal deleted inserted replaced
13:3660c9088494 14:d0c39b5e78cf
1 #!/usr/bin/env Rscript 1 #!/usr/bin/env Rscript
2 2
3 # A command-line interface to DESeq2 for use with Galaxy 3 # A command-line interface to DESeq2 for use with Galaxy
4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30 4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30
5 # 5 #
6 # one of these arguments is required: 6 # This argument is required:
7 # 7 #
8 # 'factors' a JSON list object from Galaxy 8 # 'factors' a JSON list object from Galaxy
9 #
10 # 'sample_table' is a sample table as described in ?DESeqDataSetFromHTSeqCount
11 # with columns: sample name, filename, then factors (variables)
12 # 9 #
13 # the output file has columns: 10 # the output file has columns:
14 # 11 #
15 # baseMean (mean normalized count) 12 # baseMean (mean normalized count)
16 # log2FoldChange (by default a moderated LFC estimate) 13 # log2FoldChange (by default a moderated LFC estimate)
17 # lfcSE (the standard error) 14 # lfcSE (the standard error)
18 # stat (the Wald statistic) 15 # stat (the Wald statistic)
19 # pvalue (p-value from comparison of Wald statistic to a standard Normal) 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal)
20 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter)
21 # 18 #
22 # the first variable in 'factors' and first column in 'sample_table' will be the primary factor. 19 # the first variable in 'factors' will be the primary factor.
23 # the levels of the primary factor are used in the order of appearance in factors or in sample_table. 20 # the levels of the primary factor are used in the order of appearance in factors.
24 # 21 #
25 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile'
26 # 23 #
27 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B, 24 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B,
28 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc. 25 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc.
52 "outfile", "o", 1, "character", 49 "outfile", "o", 1, "character",
53 "countsfile", "n", 1, "character", 50 "countsfile", "n", 1, "character",
54 "factors", "f", 1, "character", 51 "factors", "f", 1, "character",
55 "files_to_labels", "l", 1, "character", 52 "files_to_labels", "l", 1, "character",
56 "plots" , "p", 1, "character", 53 "plots" , "p", 1, "character",
57 "sample_table", "s", 1, "character",
58 "tximport", "i", 0, "logical", 54 "tximport", "i", 0, "logical",
59 "txtype", "y", 1, "character", 55 "txtype", "y", 1, "character",
60 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) 56 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
61 "fit_type", "t", 1, "integer", 57 "fit_type", "t", 1, "integer",
62 "many_contrasts", "m", 0, "logical", 58 "many_contrasts", "m", 0, "logical",
77 # enforce the following required arguments 73 # enforce the following required arguments
78 if (is.null(opt$outfile)) { 74 if (is.null(opt$outfile)) {
79 cat("'outfile' is required\n") 75 cat("'outfile' is required\n")
80 q(status=1) 76 q(status=1)
81 } 77 }
82 if (is.null(opt$sample_table) & is.null(opt$factors)) { 78 if (is.null(opt$factors)) {
83 cat("'factors' or 'sample_table' is required\n") 79 cat("'factors' is required\n")
84 q(status=1) 80 q(status=1)
85 } 81 }
86 82
87 verbose <- if (is.null(opt$quiet)) { 83 verbose <- if (is.null(opt$quiet)) {
88 TRUE 84 TRUE
112 # build or read sample table 108 # build or read sample table
113 109
114 trim <- function (x) gsub("^\\s+|\\s+$", "", x) 110 trim <- function (x) gsub("^\\s+|\\s+$", "", x)
115 111
116 # switch on if 'factors' was provided: 112 # switch on if 'factors' was provided:
117 if (!is.null(opt$factors)) { 113 library("rjson")
118 library("rjson") 114 parser <- newJSONParser()
119 parser <- newJSONParser() 115 parser$addData(opt$factors)
120 parser$addData(opt$factors) 116 factorList <- parser$getObject()
121 factorList <- parser$getObject() 117 filenames_to_labels <- fromJSON(opt$files_to_labels)
122 filenames_to_labels <- fromJSON(opt$files_to_labels) 118 factors <- sapply(factorList, function(x) x[[1]])
123 factors <- sapply(factorList, function(x) x[[1]]) 119 primaryFactor <- factors[1]
124 primaryFactor <- factors[1] 120 filenamesIn <- unname(unlist(factorList[[1]][[2]]))
125 filenamesIn <- unname(unlist(factorList[[1]][[2]])) 121 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)]))
126 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) 122 sampleTable <- data.frame(sample=basename(filenamesIn),
127 sampleTable <- data.frame(sample=basename(filenamesIn), 123 filename=filenamesIn,
128 filename=filenamesIn, 124 row.names=filenamesIn,
129 row.names=filenamesIn, 125 stringsAsFactors=FALSE)
130 stringsAsFactors=FALSE) 126 for (factor in factorList) {
131 for (factor in factorList) { 127 factorName <- trim(factor[[1]])
132 factorName <- trim(factor[[1]]) 128 sampleTable[[factorName]] <- character(nrow(sampleTable))
133 sampleTable[[factorName]] <- character(nrow(sampleTable)) 129 lvls <- sapply(factor[[2]], function(x) names(x))
134 lvls <- sapply(factor[[2]], function(x) names(x)) 130 for (i in seq_along(factor[[2]])) {
135 for (i in seq_along(factor[[2]])) { 131 files <- factor[[2]][[i]][[1]]
136 files <- factor[[2]][[i]][[1]] 132 sampleTable[files,factorName] <- trim(lvls[i])
137 sampleTable[files,factorName] <- trim(lvls[i]) 133 }
138 } 134 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
139 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) 135 }
140 } 136 rownames(sampleTable) <- labs
141 rownames(sampleTable) <- labs
142 } else {
143 # read the sample_table argument
144 # this table is described in ?DESeqDataSet
145 # one column for the sample name, one for the filename, and
146 # the remaining columns for factors in the analysis
147 sampleTable <- read.delim(opt$sample_table, stringsAsFactors=FALSE)
148 factors <- colnames(sampleTable)[-c(1:2)]
149 for (factor in factors) {
150 lvls <- unique(as.character(sampleTable[[factor]]))
151 sampleTable[[factor]] <- factor(sampleTable[[factor]], levels=lvls)
152 }
153 }
154 137
155 primaryFactor <- factors[1] 138 primaryFactor <- factors[1]
156 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) 139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + ")))
157 140
158 if (verbose) { 141 if (verbose) {
214 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) 197 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n"))
215 } 198 }
216 cat("\n---------------------\n") 199 cat("\n---------------------\n")
217 } 200 }
218 201
219 # if JSON input from Galaxy, path is absolute 202 # For JSON input from Galaxy, path is absolute
220 # otherwise, from sample_table, assume it is relative 203 dir <- ""
221 dir <- if (is.null(opt$factors)) {
222 "."
223 } else {
224 ""
225 }
226 204
227 if (!useTXI) { 205 if (!useTXI) {
228 # construct the object from HTSeq files 206 # construct the object from HTSeq files
229 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, 207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
230 directory = dir, 208 directory = dir,