Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 14:d0c39b5e78cf draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit e811a7887db870f4f94f620f52bce656c8d5ba23
author | iuc |
---|---|
date | Thu, 12 Apr 2018 17:29:45 -0400 |
parents | bd06df00180a |
children | 9a616afdbda5 |
comparison
equal
deleted
inserted
replaced
13:3660c9088494 | 14:d0c39b5e78cf |
---|---|
1 #!/usr/bin/env Rscript | 1 #!/usr/bin/env Rscript |
2 | 2 |
3 # A command-line interface to DESeq2 for use with Galaxy | 3 # A command-line interface to DESeq2 for use with Galaxy |
4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30 | 4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30 |
5 # | 5 # |
6 # one of these arguments is required: | 6 # This argument is required: |
7 # | 7 # |
8 # 'factors' a JSON list object from Galaxy | 8 # 'factors' a JSON list object from Galaxy |
9 # | |
10 # 'sample_table' is a sample table as described in ?DESeqDataSetFromHTSeqCount | |
11 # with columns: sample name, filename, then factors (variables) | |
12 # | 9 # |
13 # the output file has columns: | 10 # the output file has columns: |
14 # | 11 # |
15 # baseMean (mean normalized count) | 12 # baseMean (mean normalized count) |
16 # log2FoldChange (by default a moderated LFC estimate) | 13 # log2FoldChange (by default a moderated LFC estimate) |
17 # lfcSE (the standard error) | 14 # lfcSE (the standard error) |
18 # stat (the Wald statistic) | 15 # stat (the Wald statistic) |
19 # pvalue (p-value from comparison of Wald statistic to a standard Normal) | 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) |
20 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) | 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) |
21 # | 18 # |
22 # the first variable in 'factors' and first column in 'sample_table' will be the primary factor. | 19 # the first variable in 'factors' will be the primary factor. |
23 # the levels of the primary factor are used in the order of appearance in factors or in sample_table. | 20 # the levels of the primary factor are used in the order of appearance in factors. |
24 # | 21 # |
25 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' | 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' |
26 # | 23 # |
27 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B, | 24 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B, |
28 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc. | 25 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc. |
52 "outfile", "o", 1, "character", | 49 "outfile", "o", 1, "character", |
53 "countsfile", "n", 1, "character", | 50 "countsfile", "n", 1, "character", |
54 "factors", "f", 1, "character", | 51 "factors", "f", 1, "character", |
55 "files_to_labels", "l", 1, "character", | 52 "files_to_labels", "l", 1, "character", |
56 "plots" , "p", 1, "character", | 53 "plots" , "p", 1, "character", |
57 "sample_table", "s", 1, "character", | |
58 "tximport", "i", 0, "logical", | 54 "tximport", "i", 0, "logical", |
59 "txtype", "y", 1, "character", | 55 "txtype", "y", 1, "character", |
60 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) | 56 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) |
61 "fit_type", "t", 1, "integer", | 57 "fit_type", "t", 1, "integer", |
62 "many_contrasts", "m", 0, "logical", | 58 "many_contrasts", "m", 0, "logical", |
77 # enforce the following required arguments | 73 # enforce the following required arguments |
78 if (is.null(opt$outfile)) { | 74 if (is.null(opt$outfile)) { |
79 cat("'outfile' is required\n") | 75 cat("'outfile' is required\n") |
80 q(status=1) | 76 q(status=1) |
81 } | 77 } |
82 if (is.null(opt$sample_table) & is.null(opt$factors)) { | 78 if (is.null(opt$factors)) { |
83 cat("'factors' or 'sample_table' is required\n") | 79 cat("'factors' is required\n") |
84 q(status=1) | 80 q(status=1) |
85 } | 81 } |
86 | 82 |
87 verbose <- if (is.null(opt$quiet)) { | 83 verbose <- if (is.null(opt$quiet)) { |
88 TRUE | 84 TRUE |
112 # build or read sample table | 108 # build or read sample table |
113 | 109 |
114 trim <- function (x) gsub("^\\s+|\\s+$", "", x) | 110 trim <- function (x) gsub("^\\s+|\\s+$", "", x) |
115 | 111 |
116 # switch on if 'factors' was provided: | 112 # switch on if 'factors' was provided: |
117 if (!is.null(opt$factors)) { | 113 library("rjson") |
118 library("rjson") | 114 parser <- newJSONParser() |
119 parser <- newJSONParser() | 115 parser$addData(opt$factors) |
120 parser$addData(opt$factors) | 116 factorList <- parser$getObject() |
121 factorList <- parser$getObject() | 117 filenames_to_labels <- fromJSON(opt$files_to_labels) |
122 filenames_to_labels <- fromJSON(opt$files_to_labels) | 118 factors <- sapply(factorList, function(x) x[[1]]) |
123 factors <- sapply(factorList, function(x) x[[1]]) | 119 primaryFactor <- factors[1] |
124 primaryFactor <- factors[1] | 120 filenamesIn <- unname(unlist(factorList[[1]][[2]])) |
125 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | 121 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) |
126 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) | 122 sampleTable <- data.frame(sample=basename(filenamesIn), |
127 sampleTable <- data.frame(sample=basename(filenamesIn), | 123 filename=filenamesIn, |
128 filename=filenamesIn, | 124 row.names=filenamesIn, |
129 row.names=filenamesIn, | 125 stringsAsFactors=FALSE) |
130 stringsAsFactors=FALSE) | 126 for (factor in factorList) { |
131 for (factor in factorList) { | 127 factorName <- trim(factor[[1]]) |
132 factorName <- trim(factor[[1]]) | 128 sampleTable[[factorName]] <- character(nrow(sampleTable)) |
133 sampleTable[[factorName]] <- character(nrow(sampleTable)) | 129 lvls <- sapply(factor[[2]], function(x) names(x)) |
134 lvls <- sapply(factor[[2]], function(x) names(x)) | 130 for (i in seq_along(factor[[2]])) { |
135 for (i in seq_along(factor[[2]])) { | 131 files <- factor[[2]][[i]][[1]] |
136 files <- factor[[2]][[i]][[1]] | 132 sampleTable[files,factorName] <- trim(lvls[i]) |
137 sampleTable[files,factorName] <- trim(lvls[i]) | 133 } |
138 } | 134 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) |
139 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) | 135 } |
140 } | 136 rownames(sampleTable) <- labs |
141 rownames(sampleTable) <- labs | |
142 } else { | |
143 # read the sample_table argument | |
144 # this table is described in ?DESeqDataSet | |
145 # one column for the sample name, one for the filename, and | |
146 # the remaining columns for factors in the analysis | |
147 sampleTable <- read.delim(opt$sample_table, stringsAsFactors=FALSE) | |
148 factors <- colnames(sampleTable)[-c(1:2)] | |
149 for (factor in factors) { | |
150 lvls <- unique(as.character(sampleTable[[factor]])) | |
151 sampleTable[[factor]] <- factor(sampleTable[[factor]], levels=lvls) | |
152 } | |
153 } | |
154 | 137 |
155 primaryFactor <- factors[1] | 138 primaryFactor <- factors[1] |
156 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) | 139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) |
157 | 140 |
158 if (verbose) { | 141 if (verbose) { |
214 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) | 197 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) |
215 } | 198 } |
216 cat("\n---------------------\n") | 199 cat("\n---------------------\n") |
217 } | 200 } |
218 | 201 |
219 # if JSON input from Galaxy, path is absolute | 202 # For JSON input from Galaxy, path is absolute |
220 # otherwise, from sample_table, assume it is relative | 203 dir <- "" |
221 dir <- if (is.null(opt$factors)) { | |
222 "." | |
223 } else { | |
224 "" | |
225 } | |
226 | 204 |
227 if (!useTXI) { | 205 if (!useTXI) { |
228 # construct the object from HTSeq files | 206 # construct the object from HTSeq files |
229 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
230 directory = dir, | 208 directory = dir, |