comparison edger.R @ 8:3d89af8a44f0 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 215a0f27f3de87506895ac655f801c40e8c7edbc"
author iuc
date Thu, 03 Jun 2021 19:34:18 +0000
parents 555659de7321
children 6e53e565fc6a
comparison
equal deleted inserted replaced
7:334ce9b1bac5 8:3d89af8a44f0
5 # ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs 5 # ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs
6 # outPath", "o", 1, "character" -Path to folder to write all output to 6 # outPath", "o", 1, "character" -Path to folder to write all output to
7 # filesPath", "j", 2, "character" -JSON list object if multiple files input 7 # filesPath", "j", 2, "character" -JSON list object if multiple files input
8 # matrixPath", "m", 2, "character" -Path to count matrix 8 # matrixPath", "m", 2, "character" -Path to count matrix
9 # factFile", "f", 2, "character" -Path to factor information file 9 # factFile", "f", 2, "character" -Path to factor information file
10 # factInput", "i", 2, "character" -String containing factors if manually input 10 # factInput", "i", 2, "character" -String containing factors if manually input
11 # annoPath", "a", 2, "character" -Path to input containing gene annotations 11 # annoPath", "a", 2, "character" -Path to input containing gene annotations
12 # contrastData", "C", 1, "character" -String containing contrasts of interest 12 # contrastData", "C", 1, "character" -String containing contrasts of interest
13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement 13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement
14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement 14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement
15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement 15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement
16 # normCounts", "x", 0, "logical" -String specifying if normalised counts should be output 16 # normCounts", "x", 0, "logical" -String specifying if normalised counts should be output
17 # rdaOpt", "r", 0, "logical" -String specifying if RData should be output 17 # rdaOpt", "r", 0, "logical" -String specifying if RData should be output
18 # lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement 18 # lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement
19 # pValReq", "p", 1, "double" -Float specifying the p-value requirement 19 # pValReq", "p", 1, "double" -Float specifying the p-value requirement
20 # pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method 20 # pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method
21 # normOpt", "n", 1, "character" -String specifying type of normalisation used 21 # normOpt", "n", 1, "character" -String specifying type of normalisation used
22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used 22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used
23 # lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead 23 # lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead
24 # 24 #
25 # OUT: 25 # OUT:
26 # MDS Plot 26 # MDS Plot
27 # BCV Plot 27 # BCV Plot
28 # QL Plot 28 # QL Plot
29 # MD Plot 29 # MD Plot
30 # Expression Table 30 # Expression Table
31 # HTML file linking to the ouputs 31 # HTML file linking to the ouputs
35 # 35 #
36 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 36 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
37 # Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper) 37 # Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)
38 38
39 # Record starting time 39 # Record starting time
40 timeStart <- as.character(Sys.time()) 40 time_start <- as.character(Sys.time())
41 41
42 # setup R error handling to go to stderr 42 # setup R error handling to go to stderr
43 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 43 options(show.error.messages = F, error = function() {
44 cat(geterrmessage(), file = stderr())
45 q("no", 1, F)
46 })
44 47
45 # we need that to not crash galaxy with an UTF8 error on German LC settings. 48 # we need that to not crash galaxy with an UTF8 error on German LC settings.
46 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 49 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
47 50
48 # Load all required libraries 51 # Load all required libraries
49 library(methods, quietly=TRUE, warn.conflicts=FALSE) 52 library(methods, quietly = TRUE, warn.conflicts = FALSE)
50 library(statmod, quietly=TRUE, warn.conflicts=FALSE) 53 library(statmod, quietly = TRUE, warn.conflicts = FALSE)
51 library(splines, quietly=TRUE, warn.conflicts=FALSE) 54 library(splines, quietly = TRUE, warn.conflicts = FALSE)
52 library(edgeR, quietly=TRUE, warn.conflicts=FALSE) 55 library(edgeR, quietly = TRUE, warn.conflicts = FALSE)
53 library(limma, quietly=TRUE, warn.conflicts=FALSE) 56 library(limma, quietly = TRUE, warn.conflicts = FALSE)
54 library(scales, quietly=TRUE, warn.conflicts=FALSE) 57 library(scales, quietly = TRUE, warn.conflicts = FALSE)
55 library(getopt, quietly=TRUE, warn.conflicts=FALSE) 58 library(getopt, quietly = TRUE, warn.conflicts = FALSE)
56 59
57 ################################################################################ 60 ################################################################################
58 ### Function Delcaration 61 ### Function Delcaration
59 ################################################################################ 62 ################################################################################
60 # Function to sanitise contrast equations so there are no whitespaces 63 # Function to sanitise contrast equations so there are no whitespaces
61 # surrounding the arithmetic operators, leading or trailing whitespace 64 # surrounding the arithmetic operators, leading or trailing whitespace
62 sanitiseEquation <- function(equation) { 65 sanitise_equation <- function(equation) {
63 equation <- gsub(" *[+] *", "+", equation) 66 equation <- gsub(" *[+] *", "+", equation)
64 equation <- gsub(" *[-] *", "-", equation) 67 equation <- gsub(" *[-] *", "-", equation)
65 equation <- gsub(" *[/] *", "/", equation) 68 equation <- gsub(" *[/] *", "/", equation)
66 equation <- gsub(" *[*] *", "*", equation) 69 equation <- gsub(" *[*] *", "*", equation)
67 equation <- gsub("^\\s+|\\s+$", "", equation) 70 equation <- gsub("^\\s+|\\s+$", "", equation)
68 return(equation) 71 return(equation)
69 } 72 }
70 73
71 # Function to sanitise group information 74 # Function to sanitise group information
72 sanitiseGroups <- function(string) { 75 sanitise_groups <- function(string) {
73 string <- gsub(" *[,] *", ",", string) 76 string <- gsub(" *[,] *", ",", string)
74 string <- gsub("^\\s+|\\s+$", "", string) 77 string <- gsub("^\\s+|\\s+$", "", string)
75 return(string) 78 return(string)
76 } 79 }
77 80
78 # Function to change periods to whitespace in a string 81 # Function to change periods to whitespace in a string
79 unmake.names <- function(string) { 82 unmake_names <- function(string) {
80 string <- gsub(".", " ", string, fixed=TRUE) 83 string <- gsub(".", " ", string, fixed = TRUE)
81 return(string) 84 return(string)
82 } 85 }
83 86
84 # Generate output folder and paths 87 # Generate output folder and paths
85 makeOut <- function(filename) { 88 make_out <- function(filename) {
86 return(paste0(opt$outPath, "/", filename)) 89 return(paste0(out_path, "/", filename))
87 } 90 }
88 91
89 # Generating design information 92 # Generating design information
90 pasteListName <- function(string) { 93 paste_listname <- function(string) {
91 return(paste0("factors$", string)) 94 return(paste0("factors$", string))
92 } 95 }
93 96
94 # Create cata function: default path set, default seperator empty and appending 97 # Create cata function: default path set, default seperator empty and appending
95 # true by default (Ripped straight from the cat function with altered argument 98 # true by default (Ripped straight from the cat function with altered argument
96 # defaults) 99 # defaults)
97 cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL, 100 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
98 append=TRUE) { 101 append = TRUE) {
99 if (is.character(file)) 102 if (is.character(file)) {
100 if (file == "") 103 if (file == "") {
101 file <- stdout() 104 file <- stdout()
102 else if (substring(file, 1L, 1L) == "|") { 105 } else if (substring(file, 1L, 1L) == "|") {
103 file <- pipe(substring(file, 2L), "w") 106 file <- pipe(substring(file, 2L), "w")
104 on.exit(close(file)) 107 on.exit(close(file))
105 } 108 }
106 else { 109 else {
107 file <- file(file, ifelse(append, "a", "w")) 110 file <- file(file, ifelse(append, "a", "w"))
108 on.exit(close(file)) 111 on.exit(close(file))
109 } 112 }
110 .Internal(cat(list(...), file, sep, fill, labels, append)) 113 }
114 .Internal(cat(list(...), file, sep, fill, labels, append))
111 } 115 }
112 116
113 # Function to write code for html head and title 117 # Function to write code for html head and title
114 HtmlHead <- function(title) { 118 html_head <- function(title) {
115 cata("<head>\n") 119 cata("<head>\n")
116 cata("<title>", title, "</title>\n") 120 cata("<title>", title, "</title>\n")
117 cata("</head>\n") 121 cata("</head>\n")
118 } 122 }
119 123
120 # Function to write code for html links 124 # Function to write code for html links
121 HtmlLink <- function(address, label=address) { 125 html_link <- function(address, label = address) {
122 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") 126 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
123 } 127 }
124 128
125 # Function to write code for html images 129 # Function to write code for html images
126 HtmlImage <- function(source, label=source, height=600, width=600) { 130 html_image <- function(source, label = source, height = 600, width = 600) {
127 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) 131 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
128 cata("\" width=\"", width, "\"/>\n") 132 cata("\" width=\"", width, "\"/>\n")
129 } 133 }
130 134
131 # Function to write code for html list items 135 # Function to write code for html list items
132 ListItem <- function(...) { 136 list_item <- function(...) {
133 cata("<li>", ..., "</li>\n") 137 cata("<li>", ..., "</li>\n")
134 } 138 }
135 139
136 TableItem <- function(...) { 140 table_item <- function(...) {
137 cata("<td>", ..., "</td>\n") 141 cata("<td>", ..., "</td>\n")
138 } 142 }
139 143
140 TableHeadItem <- function(...) { 144 table_head_item <- function(...) {
141 cata("<th>", ..., "</th>\n") 145 cata("<th>", ..., "</th>\n")
142 } 146 }
143 147
144 ################################################################################ 148 ################################################################################
145 ### Input Processing 149 ### Input Processing
146 ################################################################################ 150 ################################################################################
147 151
148 # Collect arguments from command line 152 # Collect arguments from command line
149 args <- commandArgs(trailingOnly=TRUE) 153 args <- commandArgs(trailingOnly = TRUE)
150 154
151 # Get options, using the spec as defined by the enclosed list. 155 # Get options, using the spec as defined by the enclosed list.
152 # Read the options from the default: commandArgs(TRUE). 156 # Read the options from the default: commandArgs(TRUE).
153 spec <- matrix(c( 157 spec <- matrix(c(
154 "htmlPath", "R", 1, "character", 158 "htmlPath", "R", 1, "character",
155 "outPath", "o", 1, "character", 159 "outPath", "o", 1, "character",
156 "filesPath", "j", 2, "character", 160 "filesPath", "j", 2, "character",
157 "matrixPath", "m", 2, "character", 161 "matrixPath", "m", 2, "character",
158 "factFile", "f", 2, "character", 162 "factFile", "f", 2, "character",
159 "factInput", "i", 2, "character", 163 "factInput", "i", 2, "character",
160 "annoPath", "a", 2, "character", 164 "annoPath", "a", 2, "character",
161 "contrastData", "C", 1, "character", 165 "contrastData", "C", 1, "character",
162 "cpmReq", "c", 1, "double", 166 "cpmReq", "c", 1, "double",
163 "totReq", "y", 0, "logical", 167 "totReq", "y", 0, "logical",
164 "cntReq", "z", 1, "integer", 168 "cntReq", "z", 1, "integer",
165 "sampleReq", "s", 1, "integer", 169 "sampleReq", "s", 1, "integer",
166 "normCounts", "x", 0, "logical", 170 "normCounts", "x", 0, "logical",
167 "rdaOpt", "r", 0, "logical", 171 "rdaOpt", "r", 0, "logical",
168 "lfcReq", "l", 1, "double", 172 "lfcReq", "l", 1, "double",
169 "pValReq", "p", 1, "double", 173 "pValReq", "p", 1, "double",
170 "pAdjOpt", "d", 1, "character", 174 "pAdjOpt", "d", 1, "character",
171 "normOpt", "n", 1, "character", 175 "normOpt", "n", 1, "character",
172 "robOpt", "b", 0, "logical", 176 "robOpt", "b", 0, "logical",
173 "lrtOpt", "t", 0, "logical"), 177 "lrtOpt", "t", 0, "logical"
174 byrow=TRUE, ncol=4) 178 ),
179 byrow = TRUE, ncol = 4
180 )
175 opt <- getopt(spec) 181 opt <- getopt(spec)
176 182
177 183
178 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) { 184 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
179 cat("A counts matrix (or a set of counts files) is required.\n") 185 cat("A counts matrix (or a set of counts files) is required.\n")
180 q(status=1) 186 q(status = 1)
181 } 187 }
182 188
183 if (is.null(opt$cpmReq)) { 189 if (is.null(opt$cpmReq)) {
184 filtCPM <- FALSE 190 filt_cpm <- FALSE
185 } else { 191 } else {
186 filtCPM <- TRUE 192 filt_cpm <- TRUE
187 } 193 }
188 194
189 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { 195 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
190 filtSmpCount <- FALSE 196 filt_smpcount <- FALSE
191 } else { 197 } else {
192 filtSmpCount <- TRUE 198 filt_smpcount <- TRUE
193 } 199 }
194 200
195 if (is.null(opt$totReq)) { 201 if (is.null(opt$totReq)) {
196 filtTotCount <- FALSE 202 filt_totcount <- FALSE
197 } else { 203 } else {
198 filtTotCount <- TRUE 204 filt_totcount <- TRUE
199 } 205 }
200 206
201 if (is.null(opt$lrtOpt)) { 207 if (is.null(opt$lrtOpt)) {
202 wantLRT <- FALSE 208 want_lrt <- FALSE
203 } else { 209 } else {
204 wantLRT <- TRUE 210 want_lrt <- TRUE
205 } 211 }
206 212
207 if (is.null(opt$rdaOpt)) { 213 if (is.null(opt$rdaOpt)) {
208 wantRda <- FALSE 214 want_rda <- FALSE
209 } else { 215 } else {
210 wantRda <- TRUE 216 want_rda <- TRUE
211 } 217 }
212 218
213 if (is.null(opt$annoPath)) { 219 if (is.null(opt$annoPath)) {
214 haveAnno <- FALSE 220 have_anno <- FALSE
215 } else { 221 } else {
216 haveAnno <- TRUE 222 have_anno <- TRUE
217 } 223 }
218 224
219 if (is.null(opt$normCounts)) { 225 if (is.null(opt$normCounts)) {
220 wantNorm <- FALSE 226 want_norm <- FALSE
221 } else { 227 } else {
222 wantNorm <- TRUE 228 want_norm <- TRUE
223 } 229 }
224 230
225 if (is.null(opt$robOpt)) { 231 if (is.null(opt$robOpt)) {
226 wantRobust <- FALSE 232 want_robust <- FALSE
227 } else { 233 } else {
228 wantRobust <- TRUE 234 want_robust <- TRUE
229 } 235 }
230 236
231 237
232 if (!is.null(opt$filesPath)) { 238 if (!is.null(opt$filesPath)) {
233 # Process the separate count files (adapted from DESeq2 wrapper) 239 # Process the separate count files (adapted from DESeq2 wrapper)
234 library("rjson") 240 library("rjson")
235 parser <- newJSONParser() 241 parser <- newJSONParser()
236 parser$addData(opt$filesPath) 242 parser$addData(opt$filesPath)
237 factorList <- parser$getObject() 243 factor_list <- parser$getObject()
238 factors <- sapply(factorList, function(x) x[[1]]) 244 factors <- sapply(factor_list, function(x) x[[1]])
239 filenamesIn <- unname(unlist(factorList[[1]][[2]])) 245 filenames_in <- unname(unlist(factor_list[[1]][[2]]))
240 sampleTable <- data.frame(sample=basename(filenamesIn), 246 sampletable <- data.frame(
241 filename=filenamesIn, 247 sample = basename(filenames_in),
242 row.names=filenamesIn, 248 filename = filenames_in,
243 stringsAsFactors=FALSE) 249 row.names = filenames_in,
244 for (factor in factorList) { 250 stringsAsFactors = FALSE
245 factorName <- factor[[1]] 251 )
246 sampleTable[[factorName]] <- character(nrow(sampleTable)) 252 for (factor in factor_list) {
247 lvls <- sapply(factor[[2]], function(x) names(x)) 253 factorname <- factor[[1]]
248 for (i in seq_along(factor[[2]])) { 254 sampletable[[factorname]] <- character(nrow(sampletable))
249 files <- factor[[2]][[i]][[1]] 255 lvls <- sapply(factor[[2]], function(x) names(x))
250 sampleTable[files,factorName] <- lvls[i] 256 for (i in seq_along(factor[[2]])) {
251 } 257 files <- factor[[2]][[i]][[1]]
252 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) 258 sampletable[files, factorname] <- lvls[i]
253 } 259 }
254 rownames(sampleTable) <- sampleTable$sample 260 sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
255 rem <- c("sample","filename") 261 }
256 factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE] 262 rownames(sampletable) <- sampletable$sample
257 263 rem <- c("sample", "filename")
258 #read in count files and create single table 264 factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
259 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) 265
260 counts <- do.call("cbind", countfiles) 266 # read in count files and create single table
261 267 countfiles <- lapply(sampletable$filename, function(x) {
262 } else { 268 read.delim(x, row.names = 1)
263 # Process the single count matrix 269 })
264 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", strip.white=TRUE, stringsAsFactors=FALSE) 270 counts <- do.call("cbind", countfiles)
265 row.names(counts) <- counts[, 1] 271 } else {
266 counts <- counts[ , -1] 272 # Process the single count matrix
267 countsRows <- nrow(counts) 273 counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
268 274 row.names(counts) <- counts[, 1]
269 # Process factors 275 counts <- counts[, -1]
270 if (is.null(opt$factInput)) { 276 countsrows <- nrow(counts)
271 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) 277
272 # check samples names match 278 # Process factors
273 if(!any(factorData[, 1] %in% colnames(counts))) 279 if (is.null(opt$factInput)) {
274 stop("Sample IDs in factors file and count matrix don't match") 280 factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE)
275 # order samples as in counts matrix 281 # check samples names match
276 factorData <- factorData[match(colnames(counts), factorData[, 1]), ] 282 if (!any(factordata[, 1] %in% colnames(counts))) {
277 factors <- factorData[, -1, drop=FALSE] 283 stop("Sample IDs in factors file and count matrix don't match")
278 } else {
279 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
280 factorData <- list()
281 for (fact in factors) {
282 newFact <- unlist(strsplit(fact, split="::"))
283 factorData <- rbind(factorData, newFact)
284 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
285
286 # Set the row names to be the name of the factor and delete first row
287 row.names(factorData) <- factorData[, 1]
288 factorData <- factorData[, -1]
289 factorData <- sapply(factorData, sanitiseGroups)
290 factorData <- sapply(factorData, strsplit, split=",")
291 factorData <- sapply(factorData, make.names)
292 # Transform factor data into data frame of R factor objects
293 factors <- data.frame(factorData)
294 } 284 }
295 } 285 # order samples as in counts matrix
296 286 factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
297 # if annotation file provided 287 factors <- factordata[, -1, drop = FALSE]
298 if (haveAnno) { 288 } else {
299 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", quote= "", strip.white=TRUE, stringsAsFactors=FALSE) 289 factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
300 } 290 factordata <- list()
301 291 for (fact in factors) {
302 #Create output directory 292 newfact <- unlist(strsplit(fact, split = "::"))
303 dir.create(opt$outPath, showWarnings=FALSE) 293 factordata <- rbind(factordata, newfact)
294 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
295
296 # Set the row names to be the name of the factor and delete first row
297 row.names(factordata) <- factordata[, 1]
298 factordata <- factordata[, -1]
299 factordata <- sapply(factordata, sanitise_groups)
300 factordata <- sapply(factordata, strsplit, split = ",")
301 factordata <- sapply(factordata, make.names)
302 # Transform factor data into data frame of R factor objects
303 factors <- data.frame(factordata)
304 }
305 }
306
307 # if annotation file provided
308 if (have_anno) {
309 geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
310 }
311
312 # Create output directory
313 out_path <- opt$outPath
314 dir.create(out_path, showWarnings = FALSE)
304 315
305 # Split up contrasts separated by comma into a vector then sanitise 316 # Split up contrasts separated by comma into a vector then sanitise
306 contrastData <- unlist(strsplit(opt$contrastData, split=",")) 317 contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
307 contrastData <- sanitiseEquation(contrastData) 318 contrast_data <- sanitise_equation(contrast_data)
308 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) 319 contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE)
309 320
310 bcvOutPdf <- makeOut("bcvplot.pdf") 321 bcv_pdf <- make_out("bcvplot.pdf")
311 bcvOutPng <- makeOut("bcvplot.png") 322 bcv_png <- make_out("bcvplot.png")
312 qlOutPdf <- makeOut("qlplot.pdf") 323 ql_pdf <- make_out("qlplot.pdf")
313 qlOutPng <- makeOut("qlplot.png") 324 ql_png <- make_out("qlplot.png")
314 mdsOutPdf <- character() # Initialise character vector 325 mds_pdf <- character() # Initialise character vector
315 mdsOutPng <- character() 326 mds_png <- character()
316 for (i in 1:ncol(factors)) { 327 for (i in seq_len(ncol(factors))) {
317 mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf")) 328 mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf"))
318 mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png")) 329 mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png"))
319 } 330 }
320 mdOutPdf <- character() 331 md_pdf <- character()
321 mdOutPng <- character() 332 md_png <- character()
322 topOut <- character() 333 top_out <- character()
323 for (i in 1:length(contrastData)) { 334 for (i in seq_along(contrast_data)) {
324 mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf")) 335 md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf"))
325 mdOutPng[i] <- makeOut(paste0("mdplot_", contrastData[i], ".png")) 336 md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png"))
326 topOut[i] <- makeOut(paste0("edgeR_", contrastData[i], ".tsv")) 337 top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv"))
327 } # Save output paths for each contrast as vectors 338 } # Save output paths for each contrast as vectors
328 normOut <- makeOut("edgeR_normcounts.tsv") 339 norm_out <- make_out("edgeR_normcounts.tsv")
329 rdaOut <- makeOut("edgeR_analysis.RData") 340 rda_out <- make_out("edgeR_analysis.RData")
330 sessionOut <- makeOut("session_info.txt") 341 session_out <- make_out("session_info.txt")
331 342
332 # Initialise data for html links and images, data frame with columns Label and 343 # Initialise data for html links and images, data frame with columns Label and
333 # Link 344 # Link
334 linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) 345 link_data <- data.frame(Label = character(), Link = character(), stringsAsFactors = FALSE)
335 imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) 346 image_data <- data.frame(Label = character(), Link = character(), stringsAsFactors = FALSE)
336 347
337 # Initialise vectors for storage of up/down/neutral regulated counts 348 # Initialise vectors for storage of up/down/neutral regulated counts
338 upCount <- numeric() 349 up_count <- numeric()
339 downCount <- numeric() 350 down_count <- numeric()
340 flatCount <- numeric() 351 flat_count <- numeric()
341 352
342 ################################################################################ 353 ################################################################################
343 ### Data Processing 354 ### Data Processing
344 ################################################################################ 355 ################################################################################
345 356
346 # Extract counts and annotation data 357 # Extract counts and annotation data
347 data <- list() 358 data <- list()
348 data$counts <- counts 359 data$counts <- counts
349 if (haveAnno) { 360 if (have_anno) {
350 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) 361 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
351 annoord <- geneanno[match(row.names(counts), geneanno[,1]), ] 362 annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
352 data$genes <- annoord 363 data$genes <- annoord
353 } else { 364 } else {
354 data$genes <- data.frame(GeneID=row.names(counts)) 365 data$genes <- data.frame(GeneID = row.names(counts))
355 } 366 }
356 367
357 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of 368 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
358 # samples. Default is no filtering 369 # samples. Default is no filtering
359 preFilterCount <- nrow(data$counts) 370 prefilter_count <- nrow(data$counts)
360 371
361 if (filtCPM || filtSmpCount || filtTotCount) { 372 if (filt_cpm || filt_smpcount || filt_totcount) {
362 373 if (filt_totcount) {
363 if (filtTotCount) { 374 keep <- rowSums(data$counts) >= opt$cntReq
364 keep <- rowSums(data$counts) >= opt$cntReq 375 } else if (filt_smpcount) {
365 } else if (filtSmpCount) { 376 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
366 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq 377 } else if (filt_cpm) {
367 } else if (filtCPM) { 378 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
368 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq 379 }
369 } 380
370 381 data$counts <- data$counts[keep, ]
371 data$counts <- data$counts[keep, ] 382 data$genes <- data$genes[keep, , drop = FALSE]
372 data$genes <- data$genes[keep, , drop=FALSE] 383 }
373 } 384
374 385 postfilter_count <- nrow(data$counts)
375 postFilterCount <- nrow(data$counts) 386 filtered_count <- prefilter_count - postfilter_count
376 filteredCount <- preFilterCount-postFilterCount
377
378 # Creating naming data
379 samplenames <- colnames(data$counts)
380 sampleanno <- data.frame("sampleID"=samplenames, factors)
381
382
383 # Generating the DGEList object "data"
384 data$samples <- sampleanno
385 data$samples$lib.size <- colSums(data$counts)
386 data$samples$norm.factors <- 1
387 row.names(data$samples) <- colnames(data$counts)
388 data <- new("DGEList", data)
389 387
390 # Name rows of factors according to their sample 388 # Name rows of factors according to their sample
391 row.names(factors) <- names(data$counts) 389 row.names(factors) <- names(data$counts)
392 factorList <- sapply(names(factors), pasteListName) 390 factor_list <- sapply(names(factors), paste_listname)
393 391
394 formula <- "~0" 392 # Generating the DGEList object "data"
395 for (i in 1:length(factorList)) { 393 samplenames <- colnames(data$counts)
396 formula <- paste(formula, factorList[i], sep="+") 394 genes <- data$genes
395 data <- DGEList(data$counts)
396 colnames(data) <- samplenames
397 data$samples <- factors
398 data$genes <- genes
399
400
401
402 formula <- "~0"
403 for (i in seq_along(factor_list)) {
404 formula <- paste(formula, factor_list[i], sep = "+")
397 } 405 }
398 406
399 formula <- formula(formula) 407 formula <- formula(formula)
400 design <- model.matrix(formula) 408 design <- model.matrix(formula)
401 409
402 for (i in 1:length(factorList)) { 410 for (i in seq_along(factor_list)) {
403 colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) 411 colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
404 } 412 }
405 413
406 # Calculating normalising factor, estimating dispersion 414 # Calculating normalising factor, estimating dispersion
407 data <- calcNormFactors(data, method=opt$normOpt) 415 data <- calcNormFactors(data, method = opt$normOpt)
408 416
409 if (wantRobust) { 417 if (want_robust) {
410 data <- estimateDisp(data, design=design, robust=TRUE) 418 data <- estimateDisp(data, design = design, robust = TRUE)
411 } else { 419 } else {
412 data <- estimateDisp(data, design=design) 420 data <- estimateDisp(data, design = design)
413 } 421 }
414 422
415 # Generate contrasts information 423 # Generate contrasts information
416 contrasts <- makeContrasts(contrasts=contrastData, levels=design) 424 contrasts <- makeContrasts(contrasts = contrast_data, levels = design)
417 425
418 ################################################################################ 426 ################################################################################
419 ### Data Output 427 ### Data Output
420 ################################################################################ 428 ################################################################################
421 429
422 # Plot MDS 430 # Plot MDS
423 labels <- names(counts) 431 labels <- names(counts)
424 432
425 # MDS plot 433 # MDS plot
426 png(mdsOutPng, width=600, height=600) 434 png(mds_png, width = 600, height = 600)
427 plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) 435 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
428 imgName <- paste0("MDS Plot_", names(factors)[1], ".png") 436 img_name <- paste0("MDS Plot_", names(factors)[1], ".png")
429 imgAddr <- paste0("mdsplot_", names(factors)[1], ".png") 437 img_addr <- paste0("mdsplot_", names(factors)[1], ".png")
430 imageData[1, ] <- c(imgName, imgAddr) 438 image_data[1, ] <- c(img_name, img_addr)
431 invisible(dev.off()) 439 invisible(dev.off())
432 440
433 pdf(mdsOutPdf) 441 pdf(mds_pdf)
434 plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) 442 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
435 linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf") 443 link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf")
436 linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf") 444 link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf")
437 linkData[1, ] <- c(linkName, linkAddr) 445 link_data[1, ] <- c(link_name, link_addr)
438 invisible(dev.off()) 446 invisible(dev.off())
439 447
440 # If additional factors create additional MDS plots coloured by factor 448 # If additional factors create additional MDS plots coloured by factor
441 if (ncol(factors) > 1) { 449 if (ncol(factors) > 1) {
442 for (i in 2:ncol(factors)) { 450 for (i in 2:ncol(factors)) {
443 png(mdsOutPng[i], width=600, height=600) 451 png(mds_png[i], width = 600, height = 600)
444 plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) 452 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
445 imgName <- paste0("MDS Plot_", names(factors)[i], ".png") 453 img_name <- paste0("MDS Plot_", names(factors)[i], ".png")
446 imgAddr <- paste0("mdsplot_", names(factors)[i], ".png") 454 img_addr <- paste0("mdsplot_", names(factors)[i], ".png")
447 imageData <- rbind(imageData, c(imgName, imgAddr)) 455 image_data <- rbind(image_data, c(img_name, img_addr))
448 invisible(dev.off()) 456 invisible(dev.off())
449 457
450 pdf(mdsOutPdf[i]) 458 pdf(mds_pdf[i])
451 plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) 459 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
452 linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf") 460 link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf")
453 linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf") 461 link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf")
454 linkData <- rbind(linkData, c(linkName, linkAddr)) 462 link_data <- rbind(link_data, c(link_name, link_addr))
455 invisible(dev.off()) 463 invisible(dev.off())
456 } 464 }
457 } 465 }
458 466
459 # BCV Plot 467 # BCV Plot
460 png(bcvOutPng, width=600, height=600) 468 png(bcv_png, width = 600, height = 600)
461 plotBCV(data, main="BCV Plot") 469 plotBCV(data, main = "BCV Plot")
462 imgName <- "BCV Plot" 470 img_name <- "BCV Plot"
463 imgAddr <- "bcvplot.png" 471 img_addr <- "bcvplot.png"
464 imageData <- rbind(imageData, c(imgName, imgAddr)) 472 image_data <- rbind(image_data, c(img_name, img_addr))
465 invisible(dev.off()) 473 invisible(dev.off())
466 474
467 pdf(bcvOutPdf) 475 pdf(bcv_pdf)
468 plotBCV(data, main="BCV Plot") 476 plotBCV(data, main = "BCV Plot")
469 linkName <- paste0("BCV Plot.pdf") 477 link_name <- paste0("BCV Plot.pdf")
470 linkAddr <- paste0("bcvplot.pdf") 478 link_addr <- paste0("bcvplot.pdf")
471 linkData <- rbind(linkData, c(linkName, linkAddr)) 479 link_data <- rbind(link_data, c(link_name, link_addr))
472 invisible(dev.off()) 480 invisible(dev.off())
473 481
474 # Generate fit 482 # Generate fit
475 if (wantLRT) { 483 if (want_lrt) {
476 484 fit <- glmFit(data, design)
477 fit <- glmFit(data, design) 485 } else {
478 486 if (want_robust) {
479 } else { 487 fit <- glmQLFit(data, design, robust = TRUE)
480 488 } else {
481 if (wantRobust) { 489 fit <- glmQLFit(data, design)
482 fit <- glmQLFit(data, design, robust=TRUE) 490 }
483 } else { 491
484 fit <- glmQLFit(data, design) 492 # Plot QL dispersions
485 } 493 png(ql_png, width = 600, height = 600)
486 494 plotQLDisp(fit, main = "QL Plot")
487 # Plot QL dispersions 495 img_name <- "QL Plot"
488 png(qlOutPng, width=600, height=600) 496 img_addr <- "qlplot.png"
489 plotQLDisp(fit, main="QL Plot") 497 image_data <- rbind(image_data, c(img_name, img_addr))
490 imgName <- "QL Plot" 498 invisible(dev.off())
491 imgAddr <- "qlplot.png" 499
492 imageData <- rbind(imageData, c(imgName, imgAddr)) 500 pdf(ql_pdf)
493 invisible(dev.off()) 501 plotQLDisp(fit, main = "QL Plot")
494 502 link_name <- "QL Plot.pdf"
495 pdf(qlOutPdf) 503 link_addr <- "qlplot.pdf"
496 plotQLDisp(fit, main="QL Plot") 504 link_data <- rbind(link_data, c(link_name, link_addr))
497 linkName <- "QL Plot.pdf" 505 invisible(dev.off())
498 linkAddr <- "qlplot.pdf" 506 }
499 linkData <- rbind(linkData, c(linkName, linkAddr)) 507
500 invisible(dev.off()) 508 # Save normalised counts (log2cpm)
501 } 509 if (want_norm) {
502 510 normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
503 # Save normalised counts (log2cpm) 511 normalised_counts <- data.frame(data$genes, normalised_counts)
504 if (wantNorm) { 512 write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
505 normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 513 link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
506 normalisedCounts <- data.frame(data$genes, normalisedCounts) 514 }
507 write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) 515
508 linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) 516
509 } 517 for (i in seq_along(contrast_data)) {
510 518 if (want_lrt) {
511 519 res <- glmLRT(fit, contrast = contrasts[, i])
512 for (i in 1:length(contrastData)) { 520 } else {
513 if (wantLRT) { 521 res <- glmQLFTest(fit, contrast = contrasts[, i])
514 res <- glmLRT(fit, contrast=contrasts[, i]) 522 }
515 } else { 523
516 res <- glmQLFTest(fit, contrast=contrasts[, i]) 524 status <- decideTestsDGE(res,
517 } 525 adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
518 526 lfc = opt$lfcReq
519 status = decideTestsDGE(res, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, 527 )
520 lfc=opt$lfcReq) 528 sum_status <- summary(status)
521 sumStatus <- summary(status) 529
522 530 # Collect counts for differential expression
523 # Collect counts for differential expression 531 up_count[i] <- sum_status["Up", ]
524 upCount[i] <- sumStatus["Up", ] 532 down_count[i] <- sum_status["Down", ]
525 downCount[i] <- sumStatus["Down", ] 533 flat_count[i] <- sum_status["NotSig", ]
526 flatCount[i] <- sumStatus["NotSig", ] 534
527 535 # Write top expressions table
528 # Write top expressions table 536 top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
529 top <- topTags(res, adjust.method=opt$pAdjOpt, n=Inf, sort.by="PValue") 537 write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
530 write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) 538
531 539 link_name <- paste0("edgeR_", contrast_data[i], ".tsv")
532 linkName <- paste0("edgeR_", contrastData[i], ".tsv") 540 link_addr <- paste0("edgeR_", contrast_data[i], ".tsv")
533 linkAddr <- paste0("edgeR_", contrastData[i], ".tsv") 541 link_data <- rbind(link_data, c(link_name, link_addr))
534 linkData <- rbind(linkData, c(linkName, linkAddr)) 542
535 543 # Plot MD (log ratios vs mean difference) using limma package
536 # Plot MD (log ratios vs mean difference) using limma package 544 pdf(md_pdf[i])
537 pdf(mdOutPdf[i]) 545 limma::plotMD(res,
538 limma::plotMD(res, status=status, 546 status = status,
539 main=paste("MD Plot:", unmake.names(contrastData[i])), 547 main = paste("MD Plot:", unmake_names(contrast_data[i])),
540 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), 548 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
541 xlab="Average Expression", ylab="logFC") 549 xlab = "Average Expression", ylab = "logFC"
542 550 )
543 abline(h=0, col="grey", lty=2) 551
544 552 abline(h = 0, col = "grey", lty = 2)
545 linkName <- paste0("MD Plot_", contrastData[i], ".pdf") 553
546 linkAddr <- paste0("mdplot_", contrastData[i], ".pdf") 554 link_name <- paste0("MD Plot_", contrast_data[i], ".pdf")
547 linkData <- rbind(linkData, c(linkName, linkAddr)) 555 link_addr <- paste0("mdplot_", contrast_data[i], ".pdf")
548 invisible(dev.off()) 556 link_data <- rbind(link_data, c(link_name, link_addr))
549 557 invisible(dev.off())
550 png(mdOutPng[i], height=600, width=600) 558
551 limma::plotMD(res, status=status, 559 png(md_png[i], height = 600, width = 600)
552 main=paste("MD Plot:", unmake.names(contrastData[i])), 560 limma::plotMD(res,
553 hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), 561 status = status,
554 xlab="Average Expression", ylab="logFC") 562 main = paste("MD Plot:", unmake_names(contrast_data[i])),
555 563 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
556 abline(h=0, col="grey", lty=2) 564 xlab = "Average Expression", ylab = "logFC"
557 565 )
558 imgName <- paste0("MD Plot_", contrastData[i], ".png") 566
559 imgAddr <- paste0("mdplot_", contrastData[i], ".png") 567 abline(h = 0, col = "grey", lty = 2)
560 imageData <- rbind(imageData, c(imgName, imgAddr)) 568
561 invisible(dev.off()) 569 img_name <- paste0("MD Plot_", contrast_data[i], ".png")
562 } 570 img_addr <- paste0("mdplot_", contrast_data[i], ".png")
563 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) 571 image_data <- rbind(image_data, c(img_name, img_addr))
564 row.names(sigDiff) <- contrastData 572 invisible(dev.off())
573 }
574 sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count)
575 row.names(sig_diff) <- contrast_data
565 576
566 # Save relevant items as rda object 577 # Save relevant items as rda object
567 if (wantRda) { 578 if (want_rda) {
568 if (wantNorm) { 579 if (want_norm) {
569 save(counts, data, status, normalisedCounts, labels, factors, fit, res, top, contrasts, design, 580 save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
570 file=rdaOut, ascii=TRUE) 581 file = rda_out, ascii = TRUE
571 } else { 582 )
572 save(counts, data, status, labels, factors, fit, res, top, contrasts, design, 583 } else {
573 file=rdaOut, ascii=TRUE) 584 save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
574 } 585 file = rda_out, ascii = TRUE
575 linkData <- rbind(linkData, c("edgeR_analysis.RData", "edgeR_analysis.RData")) 586 )
587 }
588 link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
576 } 589 }
577 590
578 # Record session info 591 # Record session info
579 writeLines(capture.output(sessionInfo()), sessionOut) 592 writeLines(capture.output(sessionInfo()), session_out)
580 linkData <- rbind(linkData, c("Session Info", "session_info.txt")) 593 link_data <- rbind(link_data, c("Session Info", "session_info.txt"))
581 594
582 # Record ending time and calculate total run time 595 # Record ending time and calculate total run time
583 timeEnd <- as.character(Sys.time()) 596 time_end <- as.character(Sys.time())
584 timeTaken <- capture.output(round(difftime(timeEnd, timeStart), digits=3)) 597 time_taken <- capture.output(round(difftime(time_end, time_start), digits = 3))
585 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE) 598 time_taken <- gsub("Time difference of ", "", time_taken, fixed = TRUE)
586 599
587 ################################################################################ 600 ################################################################################
588 ### HTML Generation 601 ### HTML Generation
589 ################################################################################ 602 ################################################################################
590 603
591 # Clear file 604 # Clear file
592 cat("", file=opt$htmlPath) 605 cat("", file = opt$htmlPath)
593 606
594 cata("<html>\n") 607 cata("<html>\n")
595 608
596 cata("<body>\n") 609 cata("<body>\n")
597 cata("<h3>edgeR Analysis Output:</h3>\n") 610 cata("<h3>edgeR Analysis Output:</h3>\n")
598 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n") 611 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n")
599 612
600 HtmlImage(imageData$Link[1], imageData$Label[1]) 613 html_image(image_data$Link[1], image_data$Label[1])
601 614
602 for (i in 2:nrow(imageData)) { 615 for (i in 2:nrow(image_data)) {
603 HtmlImage(imageData$Link[i], imageData$Label[i]) 616 html_image(image_data$Link[i], image_data$Label[i])
604 } 617 }
605 618
606 cata("<h4>Differential Expression Counts:</h4>\n") 619 cata("<h4>Differential Expression Counts:</h4>\n")
607 620
608 cata("<table border=\"1\" cellpadding=\"4\">\n") 621 cata("<table border=\"1\" cellpadding=\"4\">\n")
609 cata("<tr>\n") 622 cata("<tr>\n")
610 TableItem() 623 table_item()
611 for (i in colnames(sigDiff)) { 624 for (i in colnames(sig_diff)) {
612 TableHeadItem(i) 625 table_head_item(i)
613 } 626 }
614 cata("</tr>\n") 627 cata("</tr>\n")
615 for (i in 1:nrow(sigDiff)) { 628 for (i in seq_len(nrow(sig_diff))) {
616 cata("<tr>\n") 629 cata("<tr>\n")
617 TableHeadItem(unmake.names(row.names(sigDiff)[i])) 630 table_head_item(unmake_names(row.names(sig_diff)[i]))
618 for (j in 1:ncol(sigDiff)) { 631 for (j in seq_len(ncol(sig_diff))) {
619 TableItem(as.character(sigDiff[i, j])) 632 table_item(as.character(sig_diff[i, j]))
633 }
634 cata("</tr>\n")
635 }
636 cata("</table>")
637
638 cata("<h4>Plots:</h4>\n")
639 for (i in seq_len(nrow(link_data))) {
640 if (grepl(".pdf", link_data$Link[i])) {
641 html_link(link_data$Link[i], link_data$Label[i])
642 }
643 }
644
645 cata("<h4>Tables:</h4>\n")
646 for (i in seq_len(nrow(link_data))) {
647 if (grepl(".tsv", link_data$Link[i])) {
648 html_link(link_data$Link[i], link_data$Label[i])
649 }
650 }
651
652 if (want_rda) {
653 cata("<h4>R Data Objects:</h4>\n")
654 for (i in seq_len(nrow(link_data))) {
655 if (grepl(".RData", link_data$Link[i])) {
656 html_link(link_data$Link[i], link_data$Label[i])
620 } 657 }
621 cata("</tr>\n") 658 }
622 }
623 cata("</table>")
624
625 cata("<h4>Plots:</h4>\n")
626 for (i in 1:nrow(linkData)) {
627 if (grepl(".pdf", linkData$Link[i])) {
628 HtmlLink(linkData$Link[i], linkData$Label[i])
629 }
630 }
631
632 cata("<h4>Tables:</h4>\n")
633 for (i in 1:nrow(linkData)) {
634 if (grepl(".tsv", linkData$Link[i])) {
635 HtmlLink(linkData$Link[i], linkData$Label[i])
636 }
637 }
638
639 if (wantRda) {
640 cata("<h4>R Data Objects:</h4>\n")
641 for (i in 1:nrow(linkData)) {
642 if (grepl(".RData", linkData$Link[i])) {
643 HtmlLink(linkData$Link[i], linkData$Label[i])
644 }
645 }
646 } 659 }
647 660
648 cata("<p>Alt-click links to download file.</p>\n") 661 cata("<p>Alt-click links to download file.</p>\n")
649 cata("<p>Click floppy disc icon associated history item to download ") 662 cata("<p>Click floppy disc icon associated history item to download ")
650 cata("all files.</p>\n") 663 cata("all files.</p>\n")
651 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") 664 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
652 665
653 cata("<h4>Additional Information</h4>\n") 666 cata("<h4>Additional Information</h4>\n")
654 cata("<ul>\n") 667 cata("<ul>\n")
655 668
656 if (filtCPM || filtSmpCount || filtTotCount) { 669 if (filt_cpm || filt_smpcount || filt_totcount) {
657 if (filtCPM) { 670 if (filt_cpm) {
658 tempStr <- paste("Genes without more than", opt$cpmReq, 671 temp_str <- paste(
659 "CPM in at least", opt$sampleReq, "samples are insignificant", 672 "Genes without more than", opt$cpmReq,
660 "and filtered out.") 673 "CPM in at least", opt$sampleReq, "samples are insignificant",
661 } else if (filtSmpCount) { 674 "and filtered out."
662 tempStr <- paste("Genes without more than", opt$cntReq, 675 )
663 "counts in at least", opt$sampleReq, "samples are insignificant", 676 } else if (filt_smpcount) {
664 "and filtered out.") 677 temp_str <- paste(
665 } else if (filtTotCount) { 678 "Genes without more than", opt$cntReq,
666 tempStr <- paste("Genes without more than", opt$cntReq, 679 "counts in at least", opt$sampleReq, "samples are insignificant",
667 "counts, after summing counts for all samples, are insignificant", 680 "and filtered out."
668 "and filtered out.") 681 )
669 } 682 } else if (filt_totcount) {
670 683 temp_str <- paste(
671 ListItem(tempStr) 684 "Genes without more than", opt$cntReq,
672 filterProp <- round(filteredCount/preFilterCount*100, digits=2) 685 "counts, after summing counts for all samples, are insignificant",
673 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, 686 "and filtered out."
674 "%) genes were filtered out for low expression.") 687 )
675 ListItem(tempStr) 688 }
676 } 689
677 ListItem(opt$normOpt, " was the method used to normalise library sizes.") 690 list_item(temp_str)
678 if (wantLRT) { 691 filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
679 ListItem("The edgeR likelihood ratio test was used.") 692 temp_str <- paste0(
680 } else { 693 filtered_count, " of ", prefilter_count, " (", filter_prop,
681 if (wantRobust) { 694 "%) genes were filtered out for low expression."
682 ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).") 695 )
683 } else { 696 list_item(temp_str)
684 ListItem("The edgeR quasi-likelihood test was used.") 697 }
685 } 698 list_item(opt$normOpt, " was the method used to normalise library sizes.")
686 } 699 if (want_lrt) {
687 if (opt$pAdjOpt!="none") { 700 list_item("The edgeR likelihood ratio test was used.")
688 if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") { 701 } else {
689 tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ", 702 if (want_robust) {
690 "of ", opt$pValReq," and exhibit log2-fold-change of at ", 703 list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
691 "least ", opt$lfcReq, ".") 704 } else {
692 ListItem(tempStr) 705 list_item("The edgeR quasi-likelihood test was used.")
693 } else if (opt$pAdjOpt=="holm") { 706 }
694 tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ", 707 }
695 "p-value of ", opt$pValReq," by the Holm(1979) ", 708 if (opt$pAdjOpt != "none") {
696 "method, and exhibit log2-fold-change of at least ", 709 if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
697 opt$lfcReq, ".") 710 temp_str <- paste0(
698 ListItem(tempStr) 711 "MD-Plot highlighted genes are significant at FDR ",
699 } 712 "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
700 } else { 713 "least ", opt$lfcReq, "."
701 tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ", 714 )
702 "of ", opt$pValReq," and exhibit log2-fold-change of at ", 715 list_item(temp_str)
703 "least ", opt$lfcReq, ".") 716 } else if (opt$pAdjOpt == "holm") {
704 ListItem(tempStr) 717 temp_str <- paste0(
718 "MD-Plot highlighted genes are significant at adjusted ",
719 "p-value of ", opt$pValReq, " by the Holm(1979) ",
720 "method, and exhibit log2-fold-change of at least ",
721 opt$lfcReq, "."
722 )
723 list_item(temp_str)
724 }
725 } else {
726 temp_str <- paste0(
727 "MD-Plot highlighted genes are significant at p-value ",
728 "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
729 "least ", opt$lfcReq, "."
730 )
731 list_item(temp_str)
705 } 732 }
706 cata("</ul>\n") 733 cata("</ul>\n")
707 734
708 cata("<h4>Summary of experimental data:</h4>\n") 735 cata("<h4>Summary of experimental data:</h4>\n")
709 736
710 cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n") 737 cata("<p>*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*</p>\n")
711 738
712 cata("<table border=\"1\" cellpadding=\"3\">\n") 739 cata("<table border=\"1\" cellpadding=\"3\">\n")
713 cata("<tr>\n") 740 cata("<tr>\n")
714 TableHeadItem("SampleID") 741 table_head_item("SampleID")
715 TableHeadItem(names(factors)[1], " (Primary Factor)") 742 table_head_item(names(factors)[1], " (Primary Factor)")
716 743
717 if (ncol(factors) > 1) { 744 if (ncol(factors) > 1) {
718 for (i in names(factors)[2:length(names(factors))]) { 745 for (i in names(factors)[2:length(names(factors))]) {
719 TableHeadItem(i) 746 table_head_item(i)
720 } 747 }
721 cata("</tr>\n") 748 cata("</tr>\n")
722 } 749 }
723 750
724 for (i in 1:nrow(factors)) { 751 for (i in seq_len(nrow((factors)))) {
725 cata("<tr>\n") 752 cata("<tr>\n")
726 TableHeadItem(row.names(factors)[i]) 753 table_head_item(row.names(factors)[i])
727 for (j in 1:ncol(factors)) { 754 for (j in seq_len(ncol(factors))) {
728 TableItem(as.character(unmake.names(factors[i, j]))) 755 table_item(as.character(unmake_names(factors[i, j])))
729 } 756 }
730 cata("</tr>\n") 757 cata("</tr>\n")
731 } 758 }
732 cata("</table>") 759 cata("</table>")
733 760
734 for (i in 1:nrow(linkData)) { 761 for (i in seq_len(nrow(link_data))) {
735 if (grepl("session_info", linkData$Link[i])) { 762 if (grepl("session_info", link_data$Link[i])) {
736 HtmlLink(linkData$Link[i], linkData$Label[i]) 763 html_link(link_data$Link[i], link_data$Label[i])
737 } 764 }
738 } 765 }
739 766
740 cata("<table border=\"0\">\n") 767 cata("<table border=\"0\">\n")
741 cata("<tr>\n") 768 cata("<tr>\n")
742 TableItem("Task started at:"); TableItem(timeStart) 769 table_item("Task started at:")
770 table_item(time_start)
743 cata("</tr>\n") 771 cata("</tr>\n")
744 cata("<tr>\n") 772 cata("<tr>\n")
745 TableItem("Task ended at:"); TableItem(timeEnd) 773 table_item("Task ended at:")
774 table_item(time_end)
746 cata("</tr>\n") 775 cata("</tr>\n")
747 cata("<tr>\n") 776 cata("<tr>\n")
748 TableItem("Task run time:"); TableItem(timeTaken) 777 table_item("Task run time:")
778 table_item(time_taken)
749 cata("<tr>\n") 779 cata("<tr>\n")
750 cata("</table>\n") 780 cata("</table>\n")
751 781
752 cata("</body>\n") 782 cata("</body>\n")
753 cata("</html>") 783 cata("</html>")