Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 25:de44f8eff84a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
author | iuc |
---|---|
date | Wed, 25 Nov 2020 18:36:55 +0000 |
parents | 71bacea10eee |
children | d027d1f4984e |
comparison
equal
deleted
inserted
replaced
24:71bacea10eee | 25:de44f8eff84a |
---|---|
29 # 1 "parametric" | 29 # 1 "parametric" |
30 # 2 "local" | 30 # 2 "local" |
31 # 3 "mean" | 31 # 3 "mean" |
32 | 32 |
33 # setup R error handling to go to stderr | 33 # setup R error handling to go to stderr |
34 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 34 options(show.error.messages = F, error = function() { |
35 cat(geterrmessage(), file = stderr()); q("no", 1, F) | |
36 }) | |
35 | 37 |
36 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 38 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
37 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 39 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
38 | 40 |
39 library("getopt") | 41 library("getopt") |
53 "rlogfile", "r", 1, "character", | 55 "rlogfile", "r", 1, "character", |
54 "vstfile", "v", 1, "character", | 56 "vstfile", "v", 1, "character", |
55 "header", "H", 0, "logical", | 57 "header", "H", 0, "logical", |
56 "factors", "f", 1, "character", | 58 "factors", "f", 1, "character", |
57 "files_to_labels", "l", 1, "character", | 59 "files_to_labels", "l", 1, "character", |
58 "plots" , "p", 1, "character", | 60 "plots", "p", 1, "character", |
59 "tximport", "i", 0, "logical", | 61 "tximport", "i", 0, "logical", |
60 "txtype", "y", 1, "character", | 62 "txtype", "y", 1, "character", |
61 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file | 63 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file |
62 "esf", "e", 1, "character", | 64 "esf", "e", 1, "character", |
63 "fit_type", "t", 1, "integer", | 65 "fit_type", "t", 1, "integer", |
64 "many_contrasts", "m", 0, "logical", | 66 "many_contrasts", "m", 0, "logical", |
65 "outlier_replace_off" , "a", 0, "logical", | 67 "outlier_replace_off", "a", 0, "logical", |
66 "outlier_filter_off" , "b", 0, "logical", | 68 "outlier_filter_off", "b", 0, "logical", |
67 "auto_mean_filter_off", "c", 0, "logical", | 69 "auto_mean_filter_off", "c", 0, "logical", |
68 "beta_prior_off", "d", 0, "logical"), | 70 "beta_prior_off", "d", 0, "logical" |
69 byrow=TRUE, ncol=4) | 71 ), byrow = TRUE, ncol = 4) |
70 opt <- getopt(spec) | 72 opt <- getopt(spec) |
71 | 73 |
72 # if help was asked for print a friendly message | 74 # if help was asked for print a friendly message |
73 # and exit with a non-zero error code | 75 # and exit with a non-zero error code |
74 if (!is.null(opt$help)) { | 76 if (!is.null(opt$help)) { |
75 cat(getopt(spec, usage=TRUE)) | 77 cat(getopt(spec, usage = TRUE)) |
76 q(status=1) | 78 q(status = 1) |
77 } | 79 } |
78 | 80 |
79 # enforce the following required arguments | 81 # enforce the following required arguments |
80 if (is.null(opt$outfile)) { | 82 if (is.null(opt$outfile)) { |
81 cat("'outfile' is required\n") | 83 cat("'outfile' is required\n") |
82 q(status=1) | 84 q(status = 1) |
83 } | 85 } |
84 if (is.null(opt$factors)) { | 86 if (is.null(opt$factors)) { |
85 cat("'factors' is required\n") | 87 cat("'factors' is required\n") |
86 q(status=1) | 88 q(status = 1) |
87 } | 89 } |
88 | 90 |
89 verbose <- if (is.null(opt$quiet)) { | 91 verbose <- is.null(opt$quiet) |
90 TRUE | 92 |
91 } else { | 93 source_local <- function(fname) { |
92 FALSE | |
93 } | |
94 | |
95 source_local <- function(fname){ | |
96 argv <- commandArgs(trailingOnly = FALSE) | 94 argv <- commandArgs(trailingOnly = FALSE) |
97 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | 95 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) |
98 source(paste(base_dir, fname, sep="/")) | 96 source(paste(base_dir, fname, sep = "/")) |
99 } | 97 } |
100 | 98 |
101 source_local('get_deseq_dataset.R') | 99 source_local("get_deseq_dataset.R") |
102 | 100 |
103 suppressPackageStartupMessages({ | 101 suppressPackageStartupMessages({ |
104 library("DESeq2") | 102 library("DESeq2") |
105 library("RColorBrewer") | 103 library("RColorBrewer") |
106 library("gplots") | 104 library("gplots") |
107 }) | 105 }) |
108 | 106 |
109 if (opt$cores > 1) { | 107 if (opt$cores > 1) { |
110 library("BiocParallel") | 108 library("BiocParallel") |
111 register(MulticoreParam(opt$cores)) | 109 register(MulticoreParam(opt$cores)) |
112 parallel = TRUE | 110 parallel <- TRUE |
113 } else { | 111 } else { |
114 parallel = FALSE | 112 parallel <- FALSE |
115 } | 113 } |
116 | 114 |
117 # build or read sample table | 115 # build or read sample table |
118 | 116 |
119 trim <- function (x) gsub("^\\s+|\\s+$", "", x) | 117 trim <- function(x) gsub("^\\s+|\\s+$", "", x) |
120 | 118 |
121 # switch on if 'factors' was provided: | 119 # switch on if 'factors' was provided: |
122 library("rjson") | 120 library("rjson") |
123 parser <- newJSONParser() | 121 parser <- newJSONParser() |
124 parser$addData(opt$factors) | 122 parser$addData(opt$factors) |
125 factorList <- parser$getObject() | 123 factor_list <- parser$getObject() |
126 filenames_to_labels <- fromJSON(opt$files_to_labels) | 124 filenames_to_labels <- fromJSON(opt$files_to_labels) |
127 factors <- sapply(factorList, function(x) x[[1]]) | 125 factors <- sapply(factor_list, function(x) x[[1]]) |
128 primaryFactor <- factors[1] | 126 primary_factor <- factors[1] |
129 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | 127 filenames_in <- unname(unlist(factor_list[[1]][[2]])) |
130 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) | 128 labs <- unname(unlist(filenames_to_labels[basename(filenames_in)])) |
131 sampleTable <- data.frame(sample=basename(filenamesIn), | 129 sample_table <- data.frame( |
132 filename=filenamesIn, | 130 sample = basename(filenames_in), |
133 row.names=filenamesIn, | 131 filename = filenames_in, |
134 stringsAsFactors=FALSE) | 132 row.names = filenames_in, |
135 for (factor in factorList) { | 133 stringsAsFactors = FALSE |
136 factorName <- trim(factor[[1]]) | 134 ) |
137 sampleTable[[factorName]] <- character(nrow(sampleTable)) | 135 for (factor in factor_list) { |
136 factor_name <- trim(factor[[1]]) | |
137 sample_table[[factor_name]] <- character(nrow(sample_table)) | |
138 lvls <- sapply(factor[[2]], function(x) names(x)) | 138 lvls <- sapply(factor[[2]], function(x) names(x)) |
139 for (i in seq_along(factor[[2]])) { | 139 for (i in seq_along(factor[[2]])) { |
140 files <- factor[[2]][[i]][[1]] | 140 files <- factor[[2]][[i]][[1]] |
141 sampleTable[files,factorName] <- trim(lvls[i]) | 141 sample_table[files, factor_name] <- trim(lvls[i]) |
142 } | 142 } |
143 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) | 143 sample_table[[factor_name]] <- factor(sample_table[[factor_name]], levels = lvls) |
144 } | 144 } |
145 rownames(sampleTable) <- labs | 145 rownames(sample_table) <- labs |
146 | 146 |
147 primaryFactor <- factors[1] | 147 design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) |
148 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) | |
149 | 148 |
150 # these are plots which are made once for each analysis | 149 # these are plots which are made once for each analysis |
151 generateGenericPlots <- function(dds, factors) { | 150 generate_generic_plots <- function(dds, factors) { |
152 library("ggplot2") | 151 library("ggplot2") |
153 library("ggrepel") | 152 library("ggrepel") |
154 library("pheatmap") | 153 library("pheatmap") |
155 | 154 |
156 rld <- rlog(dds) | 155 rld <- rlog(dds) |
157 p <- plotPCA(rld, intgroup=rev(factors)) | 156 p <- plotPCA(rld, intgroup = rev(factors)) |
158 print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size=3) + geom_point()) | 157 print(p + geom_text_repel(aes_string(x = "PC1", y = "PC2", label = factor(colnames(dds))), size = 3) + geom_point()) |
159 dat <- assay(rld) | 158 dat <- assay(rld) |
160 distsRL <- dist(t(dat)) | 159 dists_rl <- dist(t(dat)) |
161 mat <- as.matrix(distsRL) | 160 mat <- as.matrix(dists_rl) |
162 colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) | 161 colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255) |
163 pheatmap(mat, | 162 pheatmap( |
164 clustering_distance_rows=distsRL, | 163 mat, |
165 clustering_distance_cols=distsRL, | 164 clustering_distance_rows = dists_rl, |
166 col=colors, | 165 clustering_distance_cols = dists_rl, |
167 main="Sample-to-sample distances") | 166 col = colors, |
168 plotDispEsts(dds, main="Dispersion estimates") | 167 main = "Sample-to-sample distances" |
168 ) | |
169 plotDispEsts(dds, main = "Dispersion estimates") | |
169 } | 170 } |
170 | 171 |
171 # these are plots which can be made for each comparison, e.g. | 172 # these are plots which can be made for each comparison, e.g. |
172 # once for C vs A and once for B vs A | 173 # once for C vs A and once for B vs A |
173 generateSpecificPlots <- function(res, threshold, title_suffix) { | 174 generate_specific_plots <- function(res, threshold, title_suffix) { |
174 use <- res$baseMean > threshold | 175 use <- res$baseMean > threshold |
175 if (sum(!use) == 0) { | 176 if (sum(!use) == 0) { |
176 h <- hist(res$pvalue, breaks=0:50/50, plot=FALSE) | 177 h <- hist(res$pvalue, breaks = 0:50 / 50, plot = FALSE) |
177 barplot(height = h$counts, | 178 barplot( |
178 col = "powderblue", space = 0, xlab="p-values", ylab="frequency", | 179 height = h$counts, |
179 main=paste("Histogram of p-values for",title_suffix)) | 180 col = "powderblue", |
180 text(x = c(0, length(h$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA) | 181 space = 0, |
182 xlab = "p-values", | |
183 ylab = "frequency", | |
184 main = paste("Histogram of p-values for", title_suffix) | |
185 ) | |
186 text(x = c(0, length(h$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA) | |
181 } else { | 187 } else { |
182 h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE) | 188 h1 <- hist(res$pvalue[!use], breaks = 0:50 / 50, plot = FALSE) |
183 h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE) | 189 h2 <- hist(res$pvalue[use], breaks = 0:50 / 50, plot = FALSE) |
184 colori <- c("filtered (low count)"="khaki", "not filtered"="powderblue") | 190 colori <- c("filtered (low count)" = "khaki", "not filtered" = "powderblue") |
185 barplot(height = rbind(h1$counts, h2$counts), beside = FALSE, | 191 barplot( |
186 col = colori, space = 0, xlab="p-values", ylab="frequency", | 192 height = rbind(h1$counts, h2$counts), |
187 main=paste("Histogram of p-values for",title_suffix)) | 193 beside = FALSE, |
188 text(x = c(0, length(h1$counts)), y = 0, label=paste(c(0,1)), adj=c(0.5,1.7), xpd=NA) | 194 col = colori, |
189 legend("topright", fill=rev(colori), legend=rev(names(colori)), bg="white") | 195 space = 0, |
190 } | 196 xlab = "p-values", |
191 plotMA(res, main= paste("MA-plot for",title_suffix), ylim=range(res$log2FoldChange, na.rm=TRUE)) | 197 ylab = "frequency", |
198 main = paste("Histogram of p-values for", title_suffix) | |
199 ) | |
200 text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0, 1)), adj = c(0.5, 1.7), xpd = NA) | |
201 legend("topright", fill = rev(colori), legend = rev(names(colori)), bg = "white") | |
202 } | |
203 plotMA(res, main = paste("MA-plot for", title_suffix), ylim = range(res$log2FoldChange, na.rm = TRUE)) | |
192 } | 204 } |
193 | 205 |
194 if (verbose) { | 206 if (verbose) { |
195 cat(paste("primary factor:",primaryFactor,"\n")) | 207 cat(paste("primary factor:", primary_factor, "\n")) |
196 if (length(factors) > 1) { | 208 if (length(factors) > 1) { |
197 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) | 209 cat(paste("other factors in design:", paste(factors[-length(factors)], collapse = ","), "\n")) |
198 } | 210 } |
199 cat("\n---------------------\n") | 211 cat("\n---------------------\n") |
200 } | 212 } |
201 | 213 |
202 dds <- get_deseq_dataset(sampleTable, header=opt$header, designFormula=designFormula, tximport=opt$tximport, txtype=opt$txtype, tx2gene=opt$tx2gene) | 214 dds <- get_deseq_dataset(sample_table, header = opt$header, design_formula = design_formula, tximport = opt$tximport, txtype = opt$txtype, tx2gene = opt$tx2gene) |
203 # estimate size factors for the chosen method | 215 # estimate size factors for the chosen method |
204 if(!is.null(opt$esf)){ | 216 if (!is.null(opt$esf)) { |
205 dds <- estimateSizeFactors(dds, type=opt$esf) | 217 dds <- estimateSizeFactors(dds, type = opt$esf) |
206 } | 218 } |
207 apply_batch_factors <- function (dds, batch_factors) { | 219 apply_batch_factors <- function(dds, batch_factors) { |
208 rownames(batch_factors) <- batch_factors$identifier | 220 rownames(batch_factors) <- batch_factors$identifier |
209 batch_factors <- subset(batch_factors, select = -c(identifier, condition)) | 221 batch_factors <- subset(batch_factors, select = -c(identifier, condition)) |
210 dds_samples <- colnames(dds) | 222 dds_samples <- colnames(dds) |
211 batch_samples <- rownames(batch_factors) | 223 batch_samples <- rownames(batch_factors) |
212 if (!setequal(batch_samples, dds_samples)) { | 224 if (!setequal(batch_samples, dds_samples)) { |
213 stop("Batch factor names don't correspond to input sample names, check input files") | 225 stop("Batch factor names don't correspond to input sample names, check input files") |
214 } | 226 } |
215 dds_data <- colData(dds) | 227 dds_data <- colData(dds) |
216 # Merge dds_data with batch_factors using indexes, which are sample names | 228 # Merge dds_data with batch_factors using indexes, which are sample names |
217 # Set sort to False, which maintains the order in dds_data | 229 # Set sort to False, which maintains the order in dds_data |
218 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort=F) | 230 reordered_batch <- merge(dds_data, batch_factors, by.x = 0, by.y = 0, sort = F) |
219 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] | 231 batch_factors <- reordered_batch[, ncol(dds_data):ncol(reordered_batch)] |
220 for (factor in colnames(batch_factors)) { | 232 for (factor in colnames(batch_factors)) { |
221 dds[[factor]] <- batch_factors[[factor]] | 233 dds[[factor]] <- batch_factors[[factor]] |
222 } | 234 } |
223 colnames(dds) <- reordered_batch[,1] | 235 colnames(dds) <- reordered_batch[, 1] |
224 return(dds) | 236 return(dds) |
225 } | 237 } |
226 | 238 |
227 if (!is.null(opt$batch_factors)) { | 239 if (!is.null(opt$batch_factors)) { |
228 batch_factors <- read.table(opt$batch_factors, sep="\t", header=T) | 240 batch_factors <- read.table(opt$batch_factors, sep = "\t", header = T) |
229 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) | 241 dds <- apply_batch_factors(dds = dds, batch_factors = batch_factors) |
230 batch_design <- colnames(batch_factors)[-c(1,2)] | 242 batch_design <- colnames(batch_factors)[-c(1, 2)] |
231 designFormula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse=" + "))) | 243 design_formula <- as.formula(paste("~", paste(c(batch_design, rev(factors)), collapse = " + "))) |
232 design(dds) <- designFormula | 244 design(dds) <- design_formula |
233 } | 245 } |
234 | 246 |
235 if (verbose) { | 247 if (verbose) { |
236 cat("DESeq2 run information\n\n") | 248 cat("DESeq2 run information\n\n") |
237 cat("sample table:\n") | 249 cat("sample table:\n") |
238 print(sampleTable[,-c(1:2),drop=FALSE]) | 250 print(sample_table[, -c(1:2), drop = FALSE]) |
239 cat("\ndesign formula:\n") | 251 cat("\ndesign formula:\n") |
240 print(designFormula) | 252 print(design_formula) |
241 cat("\n\n") | 253 cat("\n\n") |
242 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) | 254 cat(paste(ncol(dds), "samples with counts over", nrow(dds), "genes\n")) |
243 } | 255 } |
244 | 256 |
245 # optional outlier behavior | 257 # optional outlier behavior |
246 if (is.null(opt$outlier_replace_off)) { | 258 if (is.null(opt$outlier_replace_off)) { |
247 minRep <- 7 | 259 min_rep <- 7 |
248 } else { | 260 } else { |
249 minRep <- Inf | 261 min_rep <- Inf |
250 if (verbose) cat("outlier replacement off\n") | 262 if (verbose) cat("outlier replacement off\n") |
251 } | 263 } |
252 if (is.null(opt$outlier_filter_off)) { | 264 if (is.null(opt$outlier_filter_off)) { |
253 cooksCutoff <- TRUE | 265 cooks_cutoff <- TRUE |
254 } else { | 266 } else { |
255 cooksCutoff <- FALSE | 267 cooks_cutoff <- FALSE |
256 if (verbose) cat("outlier filtering off\n") | 268 if (verbose) cat("outlier filtering off\n") |
257 } | 269 } |
258 | 270 |
259 # optional automatic mean filtering | 271 # optional automatic mean filtering |
260 if (is.null(opt$auto_mean_filter_off)) { | 272 if (is.null(opt$auto_mean_filter_off)) { |
261 independentFiltering <- TRUE | 273 independent_filtering <- TRUE |
262 } else { | 274 } else { |
263 independentFiltering <- FALSE | 275 independent_filtering <- FALSE |
264 if (verbose) cat("automatic filtering on the mean off\n") | 276 if (verbose) cat("automatic filtering on the mean off\n") |
265 } | 277 } |
266 | 278 |
267 # shrinkage of LFCs | 279 # shrinkage of LFCs |
268 if (is.null(opt$beta_prior_off)) { | 280 if (is.null(opt$beta_prior_off)) { |
269 betaPrior <- TRUE | 281 beta_prior <- TRUE |
270 } else { | 282 } else { |
271 betaPrior <- FALSE | 283 beta_prior <- FALSE |
272 if (verbose) cat("beta prior off\n") | 284 if (verbose) cat("beta prior off\n") |
273 } | 285 } |
274 | 286 |
275 # dispersion fit type | 287 # dispersion fit type |
276 if (is.null(opt$fit_type)) { | 288 if (is.null(opt$fit_type)) { |
277 fitType <- "parametric" | 289 fit_type <- "parametric" |
278 } else { | 290 } else { |
279 fitType <- c("parametric","local","mean")[opt$fit_type] | 291 fit_type <- c("parametric", "local", "mean")[opt$fit_type] |
280 } | 292 } |
281 | 293 |
282 if (verbose) cat(paste("using disperion fit type:",fitType,"\n")) | 294 if (verbose) cat(paste("using disperion fit type:", fit_type, "\n")) |
283 | 295 |
284 # run the analysis | 296 # run the analysis |
285 dds <- DESeq(dds, fitType=fitType, betaPrior=betaPrior, minReplicatesForReplace=minRep, parallel=parallel) | 297 dds <- DESeq(dds, fitType = fit_type, betaPrior = beta_prior, minReplicatesForReplace = min_rep, parallel = parallel) |
286 | 298 |
287 # create the generic plots and leave the device open | 299 # create the generic plots and leave the device open |
288 if (!is.null(opt$plots)) { | 300 if (!is.null(opt$plots)) { |
289 if (verbose) cat("creating plots\n") | 301 if (verbose) cat("creating plots\n") |
290 pdf(opt$plots) | 302 pdf(opt$plots) |
291 generateGenericPlots(dds, factors) | 303 generate_generic_plots(dds, factors) |
292 } | 304 } |
293 | 305 |
294 n <- nlevels(colData(dds)[[primaryFactor]]) | 306 n <- nlevels(colData(dds)[[primary_factor]]) |
295 allLevels <- levels(colData(dds)[[primaryFactor]]) | 307 all_levels <- levels(colData(dds)[[primary_factor]]) |
296 | 308 |
297 if (!is.null(opt$countsfile)) { | 309 if (!is.null(opt$countsfile)) { |
298 normalizedCounts<-counts(dds,normalized=TRUE) | 310 normalized_counts <- counts(dds, normalized = TRUE) |
299 write.table(normalizedCounts, file=opt$countsfile, sep="\t", col.names=NA, quote=FALSE) | 311 write.table(normalized_counts, file = opt$countsfile, sep = "\t", col.names = NA, quote = FALSE) |
300 } | 312 } |
301 | 313 |
302 if (!is.null(opt$rlogfile)) { | 314 if (!is.null(opt$rlogfile)) { |
303 rLogNormalized <-rlogTransformation(dds) | 315 rlog_normalized <- rlogTransformation(dds) |
304 rLogNormalizedMat <- assay(rLogNormalized) | 316 rlog_normalized_mat <- assay(rlog_normalized) |
305 write.table(rLogNormalizedMat, file=opt$rlogfile, sep="\t", col.names=NA, quote=FALSE) | 317 write.table(rlog_normalized_mat, file = opt$rlogfile, sep = "\t", col.names = NA, quote = FALSE) |
306 } | 318 } |
307 | 319 |
308 if (!is.null(opt$vstfile)) { | 320 if (!is.null(opt$vstfile)) { |
309 vstNormalized<-varianceStabilizingTransformation(dds) | 321 vst_normalized <- varianceStabilizingTransformation(dds) |
310 vstNormalizedMat <- assay(vstNormalized) | 322 vst_normalized_mat <- assay(vst_normalized) |
311 write.table(vstNormalizedMat, file=opt$vstfile, sep="\t", col.names=NA, quote=FALSE) | 323 write.table(vst_normalized_mat, file = opt$vstfile, sep = "\t", col.names = NA, quote = FALSE) |
312 } | 324 } |
313 | 325 |
314 | 326 |
315 if (is.null(opt$many_contrasts)) { | 327 if (is.null(opt$many_contrasts)) { |
316 # only contrast the first and second level of the primary factor | 328 # only contrast the first and second level of the primary factor |
317 ref <- allLevels[1] | 329 ref <- all_levels[1] |
318 lvl <- allLevels[2] | 330 lvl <- all_levels[2] |
319 res <- results(dds, contrast=c(primaryFactor, lvl, ref), | 331 res <- results( |
320 cooksCutoff=cooksCutoff, | 332 dds, |
321 independentFiltering=independentFiltering) | 333 contrast = c(primary_factor, lvl, ref), |
334 cooksCutoff = cooks_cutoff, | |
335 independentFiltering = independent_filtering | |
336 ) | |
322 if (verbose) { | 337 if (verbose) { |
323 cat("summary of results\n") | 338 cat("summary of results\n") |
324 cat(paste0(primaryFactor,": ",lvl," vs ",ref,"\n")) | 339 cat(paste0(primary_factor, ": ", lvl, " vs ", ref, "\n")) |
325 print(summary(res)) | 340 print(summary(res)) |
326 } | 341 } |
327 resSorted <- res[order(res$padj),] | 342 res_sorted <- res[order(res$padj), ] |
328 outDF <- as.data.frame(resSorted) | 343 out_df <- as.data.frame(res_sorted) |
329 outDF$geneID <- rownames(outDF) | 344 out_df$geneID <- rownames(out_df) # nolint |
330 outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")] | 345 out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")] |
331 filename <- opt$outfile | 346 filename <- opt$outfile |
332 write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) | 347 write.table(out_df, file = filename, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE) |
333 if (independentFiltering) { | 348 if (independent_filtering) { |
334 threshold <- unname(attr(res, "filterThreshold")) | 349 threshold <- unname(attr(res, "filterThreshold")) |
335 } else { | 350 } else { |
336 threshold <- 0 | 351 threshold <- 0 |
337 } | 352 } |
338 title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref) | 353 title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref) |
339 if (!is.null(opt$plots)) { | 354 if (!is.null(opt$plots)) { |
340 generateSpecificPlots(res, threshold, title_suffix) | 355 generate_specific_plots(res, threshold, title_suffix) |
341 } | 356 } |
342 } else { | 357 } else { |
343 # rotate through the possible contrasts of the primary factor | 358 # rotate through the possible contrasts of the primary factor |
344 # write out a sorted table of results with the contrast as a suffix | 359 # write out a sorted table of results with the contrast as a suffix |
345 # add contrast specific plots to the device | 360 # add contrast specific plots to the device |
346 for (i in seq_len(n-1)) { | 361 for (i in seq_len(n - 1)) { |
347 ref <- allLevels[i] | 362 ref <- all_levels[i] |
348 contrastLevels <- allLevels[(i+1):n] | 363 contrast_levels <- all_levels[(i + 1):n] |
349 for (lvl in contrastLevels) { | 364 for (lvl in contrast_levels) { |
350 res <- results(dds, contrast=c(primaryFactor, lvl, ref), | 365 res <- results( |
351 cooksCutoff=cooksCutoff, | 366 dds, |
352 independentFiltering=independentFiltering) | 367 contrast = c(primary_factor, lvl, ref), |
353 resSorted <- res[order(res$padj),] | 368 cooksCutoff = cooks_cutoff, |
354 outDF <- as.data.frame(resSorted) | 369 independentFiltering = independent_filtering |
355 outDF$geneID <- rownames(outDF) | 370 ) |
356 outDF <- outDF[,c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")] | 371 res_sorted <- res[order(res$padj), ] |
357 filename <- paste0(primaryFactor,"_",lvl,"_vs_",ref) | 372 out_df <- as.data.frame(res_sorted) |
358 write.table(outDF, file=filename, sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE) | 373 out_df$geneID <- rownames(out_df) # nolint |
359 if (independentFiltering) { | 374 out_df <- out_df[, c("geneID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")] |
375 filename <- paste0(primary_factor, "_", lvl, "_vs_", ref) | |
376 write.table(out_df, file = filename, sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE) | |
377 if (independent_filtering) { | |
360 threshold <- unname(attr(res, "filterThreshold")) | 378 threshold <- unname(attr(res, "filterThreshold")) |
361 } else { | 379 } else { |
362 threshold <- 0 | 380 threshold <- 0 |
363 } | 381 } |
364 title_suffix <- paste0(primaryFactor,": ",lvl," vs ",ref) | 382 title_suffix <- paste0(primary_factor, ": ", lvl, " vs ", ref) |
365 if (!is.null(opt$plots)) { | 383 if (!is.null(opt$plots)) { |
366 generateSpecificPlots(res, threshold, title_suffix) | 384 generate_specific_plots(res, threshold, title_suffix) |
367 } | 385 } |
368 } | 386 } |
369 } | 387 } |
370 } | 388 } |
371 | 389 |
376 } | 394 } |
377 | 395 |
378 cat("Session information:\n\n") | 396 cat("Session information:\n\n") |
379 | 397 |
380 sessionInfo() | 398 sessionInfo() |
381 |