Mercurial > repos > iuc > limma_voom
comparison limma_voom.R @ 3:38aab66ae5cb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit 1640914b9812b0482a3cf684f05465f8d9cfdc65
author | iuc |
---|---|
date | Wed, 31 Jan 2018 12:45:42 -0500 |
parents | a330ddf43861 |
children | a61a6e62e91f |
comparison
equal
deleted
inserted
replaced
2:a330ddf43861 | 3:38aab66ae5cb |
---|---|
1 # This tool takes in a matrix of feature counts as well as gene annotations and | 1 # This tool takes in a matrix of feature counts as well as gene annotations and |
2 # outputs a table of top expressions as well as various plots for differential | 2 # outputs a table of top expressions as well as various plots for differential |
3 # expression analysis | 3 # expression analysis |
4 # | 4 # |
5 # ARGS: 1.countPath -Path to RData input containing counts | 5 # ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs |
6 # 2.annoPath -Path to input containing gene annotations | 6 # outPath", "o", 1, "character" -Path to folder to write all output to |
7 # 3.htmlPath -Path to html file linking to other outputs | 7 # filesPath", "j", 2, "character" -JSON list object if multiple files input |
8 # 4.outPath -Path to folder to write all output to | 8 # matrixPath", "m", 2, "character" -Path to count matrix |
9 # 5.rdaOpt -String specifying if RData should be saved | 9 # factFile", "f", 2, "character" -Path to factor information file |
10 # 6.normOpt -String specifying type of normalisation used | 10 # factInput", "i", 2, "character" -String containing factors if manually input |
11 # 7.weightOpt -String specifying usage of weights | 11 # annoPath", "a", 2, "character" -Path to input containing gene annotations |
12 # 8.contrastData -String containing contrasts of interest | 12 # contrastData", "C", 1, "character" -String containing contrasts of interest |
13 # 9.cpmReq -Float specifying cpm requirement | 13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement |
14 # 10.sampleReq -Integer specifying cpm requirement | 14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement |
15 # 11.pAdjOpt -String specifying the p-value adjustment method | 15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement |
16 # 12.pValReq -Float specifying the p-value requirement | 16 # normCounts", "x", 0, "logical" -String specifying if normalised counts should be output |
17 # 13.lfcReq -Float specifying the log-fold-change requirement | 17 # rdaOpt", "r", 0, "logical" -String specifying if RData should be output |
18 # 14.normCounts -String specifying if normalised counts should be output | 18 # lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement |
19 # 15.factPath -Path to factor information file | 19 # pValReq", "p", 1, "double" -Float specifying the p-value requirement |
20 # 16.factorData -Strings containing factor names and values if manually input | 20 # pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method |
21 # normOpt", "n", 1, "character" -String specifying type of normalisation used | |
22 # robOpt", "b", 0, "logical" -String specifying if robust options should be used | |
23 # trend", "t", 1, "double" -Float for prior.count if limma-trend is used instead of voom | |
24 # weightOpt", "w", 0, "logical" -String specifying if voomWithQualityWeights should be used | |
21 # | 25 # |
22 # OUT: Voom Plot | 26 # OUT: |
23 # BCV Plot | 27 # MDS Plot |
24 # MA Plot | 28 # Voom/SA plot |
29 # MD Plot | |
25 # Expression Table | 30 # Expression Table |
26 # HTML file linking to the ouputs | 31 # HTML file linking to the ouputs |
32 # Optional: | |
33 # Normalised counts Table | |
34 # RData file | |
35 # | |
27 # | 36 # |
28 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 | 37 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 |
29 # Modified by: Maria Doyle - Jun 2017 | 38 # Modified by: Maria Doyle - Jun 2017, Jan 2018 |
30 | 39 |
31 # Record starting time | 40 # Record starting time |
32 timeStart <- as.character(Sys.time()) | 41 timeStart <- as.character(Sys.time()) |
33 | 42 |
34 # Load all required libraries | 43 # Load all required libraries |
36 library(statmod, quietly=TRUE, warn.conflicts=FALSE) | 45 library(statmod, quietly=TRUE, warn.conflicts=FALSE) |
37 library(splines, quietly=TRUE, warn.conflicts=FALSE) | 46 library(splines, quietly=TRUE, warn.conflicts=FALSE) |
38 library(edgeR, quietly=TRUE, warn.conflicts=FALSE) | 47 library(edgeR, quietly=TRUE, warn.conflicts=FALSE) |
39 library(limma, quietly=TRUE, warn.conflicts=FALSE) | 48 library(limma, quietly=TRUE, warn.conflicts=FALSE) |
40 library(scales, quietly=TRUE, warn.conflicts=FALSE) | 49 library(scales, quietly=TRUE, warn.conflicts=FALSE) |
50 library(getopt, quietly=TRUE, warn.conflicts=FALSE) | |
41 | 51 |
42 if (packageVersion("limma") < "3.20.1") { | 52 if (packageVersion("limma") < "3.20.1") { |
43 stop("Please update 'limma' to version >= 3.20.1 to run this tool") | 53 stop("Please update 'limma' to version >= 3.20.1 to run this tool") |
44 } | 54 } |
45 | 55 |
46 ################################################################################ | 56 ################################################################################ |
47 ### Function Delcaration | 57 ### Function Delcaration |
48 ################################################################################ | 58 ################################################################################ |
49 # Function to sanitise contrast equations so there are no whitespaces | 59 # Function to sanitise contrast equations so there are no whitespaces |
50 # surrounding the arithmetic operators, leading or trailing whitespace | 60 # surrounding the arithmetic operators, leading or trailing whitespace |
51 sanitiseEquation <- function(equation) { | 61 sanitiseEquation <- function(equation) { |
52 equation <- gsub(" *[+] *", "+", equation) | 62 equation <- gsub(" *[+] *", "+", equation) |
53 equation <- gsub(" *[-] *", "-", equation) | 63 equation <- gsub(" *[-] *", "-", equation) |
54 equation <- gsub(" *[/] *", "/", equation) | 64 equation <- gsub(" *[/] *", "/", equation) |
55 equation <- gsub(" *[*] *", "*", equation) | 65 equation <- gsub(" *[*] *", "*", equation) |
56 equation <- gsub("^\\s+|\\s+$", "", equation) | 66 equation <- gsub("^\\s+|\\s+$", "", equation) |
57 return(equation) | 67 return(equation) |
58 } | 68 } |
59 | 69 |
60 # Function to sanitise group information | 70 # Function to sanitise group information |
61 sanitiseGroups <- function(string) { | 71 sanitiseGroups <- function(string) { |
62 string <- gsub(" *[,] *", ",", string) | 72 string <- gsub(" *[,] *", ",", string) |
63 string <- gsub("^\\s+|\\s+$", "", string) | 73 string <- gsub("^\\s+|\\s+$", "", string) |
64 return(string) | 74 return(string) |
65 } | 75 } |
66 | 76 |
67 # Function to change periods to whitespace in a string | 77 # Function to change periods to whitespace in a string |
68 unmake.names <- function(string) { | 78 unmake.names <- function(string) { |
69 string <- gsub(".", " ", string, fixed=TRUE) | 79 string <- gsub(".", " ", string, fixed=TRUE) |
70 return(string) | 80 return(string) |
71 } | 81 } |
72 | 82 |
73 # Generate output folder and paths | 83 # Generate output folder and paths |
74 makeOut <- function(filename) { | 84 makeOut <- function(filename) { |
75 return(paste0(outPath, "/", filename)) | 85 return(paste0(opt$outPath, "/", filename)) |
76 } | 86 } |
77 | 87 |
78 # Generating design information | 88 # Generating design information |
79 pasteListName <- function(string) { | 89 pasteListName <- function(string) { |
80 return(paste0("factors$", string)) | 90 return(paste0("factors$", string)) |
81 } | 91 } |
82 | 92 |
83 # Create cata function: default path set, default seperator empty and appending | 93 # Create cata function: default path set, default seperator empty and appending |
84 # true by default (Ripped straight from the cat function with altered argument | 94 # true by default (Ripped straight from the cat function with altered argument |
85 # defaults) | 95 # defaults) |
86 cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, | 96 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL, |
87 append = TRUE) { | 97 append = TRUE) { |
88 if (is.character(file)) | 98 if (is.character(file)) |
89 if (file == "") | 99 if (file == "") |
90 file <- stdout() | 100 file <- stdout() |
91 else if (substring(file, 1L, 1L) == "|") { | 101 else if (substring(file, 1L, 1L) == "|") { |
92 file <- pipe(substring(file, 2L), "w") | 102 file <- pipe(substring(file, 2L), "w") |
93 on.exit(close(file)) | 103 on.exit(close(file)) |
94 } | 104 } |
95 else { | 105 else { |
96 file <- file(file, ifelse(append, "a", "w")) | 106 file <- file(file, ifelse(append, "a", "w")) |
97 on.exit(close(file)) | 107 on.exit(close(file)) |
98 } | 108 } |
99 .Internal(cat(list(...), file, sep, fill, labels, append)) | 109 .Internal(cat(list(...), file, sep, fill, labels, append)) |
100 } | 110 } |
101 | 111 |
102 # Function to write code for html head and title | 112 # Function to write code for html head and title |
103 HtmlHead <- function(title) { | 113 HtmlHead <- function(title) { |
104 cata("<head>\n") | 114 cata("<head>\n") |
105 cata("<title>", title, "</title>\n") | 115 cata("<title>", title, "</title>\n") |
106 cata("</head>\n") | 116 cata("</head>\n") |
107 } | 117 } |
108 | 118 |
109 # Function to write code for html links | 119 # Function to write code for html links |
110 HtmlLink <- function(address, label=address) { | 120 HtmlLink <- function(address, label=address) { |
111 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") | 121 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") |
112 } | 122 } |
113 | 123 |
114 # Function to write code for html images | 124 # Function to write code for html images |
115 HtmlImage <- function(source, label=source, height=600, width=600) { | 125 HtmlImage <- function(source, label=source, height=600, width=600) { |
116 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) | 126 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) |
117 cata("\" width=\"", width, "\"/>\n") | 127 cata("\" width=\"", width, "\"/>\n") |
118 } | 128 } |
119 | 129 |
120 # Function to write code for html list items | 130 # Function to write code for html list items |
121 ListItem <- function(...) { | 131 ListItem <- function(...) { |
122 cata("<li>", ..., "</li>\n") | 132 cata("<li>", ..., "</li>\n") |
123 } | 133 } |
124 | 134 |
125 TableItem <- function(...) { | 135 TableItem <- function(...) { |
126 cata("<td>", ..., "</td>\n") | 136 cata("<td>", ..., "</td>\n") |
127 } | 137 } |
128 | 138 |
129 TableHeadItem <- function(...) { | 139 TableHeadItem <- function(...) { |
130 cata("<th>", ..., "</th>\n") | 140 cata("<th>", ..., "</th>\n") |
131 } | 141 } |
132 | 142 |
133 ################################################################################ | 143 ################################################################################ |
134 ### Input Processing | 144 ### Input Processing |
135 ################################################################################ | 145 ################################################################################ |
136 | 146 |
137 # Collects arguments from command line | 147 # Collect arguments from command line |
138 argv <- commandArgs(TRUE) | 148 args <- commandArgs(trailingOnly=TRUE) |
139 | 149 |
140 # Grab arguments | 150 # Get options, using the spec as defined by the enclosed list. |
141 countPath <- as.character(argv[1]) | 151 # Read the options from the default: commandArgs(TRUE). |
142 annoPath <- as.character(argv[2]) | 152 spec <- matrix(c( |
143 htmlPath <- as.character(argv[3]) | 153 "htmlPath", "R", 1, "character", |
144 outPath <- as.character(argv[4]) | 154 "outPath", "o", 1, "character", |
145 rdaOpt <- as.character(argv[5]) | 155 "filesPath", "j", 2, "character", |
146 normOpt <- as.character(argv[6]) | 156 "matrixPath", "m", 2, "character", |
147 weightOpt <- as.character(argv[7]) | 157 "factFile", "f", 2, "character", |
148 contrastData <- as.character(argv[8]) | 158 "factInput", "i", 2, "character", |
149 cpmReq <- as.numeric(argv[9]) | 159 "annoPath", "a", 2, "character", |
150 sampleReq <- as.numeric(argv[10]) | 160 "contrastData", "C", 1, "character", |
151 pAdjOpt <- as.character(argv[11]) | 161 "cpmReq", "c", 1, "double", |
152 pValReq <- as.numeric(argv[12]) | 162 "totReq", "y", 0, "logical", |
153 lfcReq <- as.numeric(argv[13]) | 163 "cntReq", "z", 1, "integer", |
154 normCounts <- as.character(argv[14]) | 164 "sampleReq", "s", 1, "integer", |
155 factPath <- as.character(argv[15]) | 165 "normCounts", "x", 0, "logical", |
156 # Process factors | 166 "rdaOpt", "r", 0, "logical", |
157 if (as.character(argv[16])=="None") { | 167 "lfcReq", "l", 1, "double", |
158 factorData <- read.table(factPath, header=TRUE, sep="\t") | 168 "pValReq", "p", 1, "double", |
159 factors <- factorData[,-1, drop=FALSE] | 169 "pAdjOpt", "d", 1, "character", |
160 } else { | 170 "normOpt", "n", 1, "character", |
161 factorData <- list() | 171 "robOpt", "b", 0, "logical", |
162 for (i in 16:length(argv)) { | 172 "trend", "t", 1, "double", |
163 newFact <- unlist(strsplit(as.character(argv[i]), split="::")) | 173 "weightOpt", "w", 0, "logical"), |
164 factorData <- rbind(factorData, newFact) | 174 byrow=TRUE, ncol=4) |
165 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. | 175 opt <- getopt(spec) |
166 | 176 |
167 # Set the row names to be the name of the factor and delete first row | 177 |
168 row.names(factorData) <- factorData[, 1] | 178 if (is.null(opt$matrixPath) & is.null(opt$filesPath)) { |
169 factorData <- factorData[, -1] | 179 cat("A counts matrix (or a set of counts files) is required.\n") |
170 factorData <- sapply(factorData, sanitiseGroups) | 180 q(status=1) |
171 factorData <- sapply(factorData, strsplit, split=",") | 181 } |
172 factorData <- sapply(factorData, make.names) | 182 |
173 # Transform factor data into data frame of R factor objects | 183 if (is.null(opt$cpmReq)) { |
174 factors <- data.frame(factorData) | 184 filtCPM <- FALSE |
175 | 185 } else { |
176 } | 186 filtCPM <- TRUE |
177 | 187 } |
178 # Process other arguments | 188 |
179 if (weightOpt=="yes") { | 189 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { |
180 wantWeight <- TRUE | 190 filtSmpCount <- FALSE |
181 } else { | 191 } else { |
182 wantWeight <- FALSE | 192 filtSmpCount <- TRUE |
183 } | 193 } |
184 | 194 |
185 if (rdaOpt=="yes") { | 195 if (is.null(opt$totReq)) { |
186 wantRda <- TRUE | 196 filtTotCount <- FALSE |
187 } else { | 197 } else { |
188 wantRda <- FALSE | 198 filtTotCount <- TRUE |
189 } | 199 } |
190 | 200 |
191 if (annoPath=="None") { | 201 if (is.null(opt$rdaOpt)) { |
192 haveAnno <- FALSE | 202 wantRda <- FALSE |
193 } else { | 203 } else { |
194 haveAnno <- TRUE | 204 wantRda <- TRUE |
195 } | 205 } |
196 | 206 |
197 if (normCounts=="yes") { | 207 if (is.null(opt$annoPath)) { |
198 wantNorm <- TRUE | 208 haveAnno <- FALSE |
199 } else { | 209 } else { |
200 wantNorm <- FALSE | 210 haveAnno <- TRUE |
201 } | 211 } |
202 | 212 |
213 if (is.null(opt$normCounts)) { | |
214 wantNorm <- FALSE | |
215 } else { | |
216 wantNorm <- TRUE | |
217 } | |
218 | |
219 if (is.null(opt$robOpt)) { | |
220 wantRobust <- FALSE | |
221 } else { | |
222 wantRobust <- TRUE | |
223 } | |
224 | |
225 if (is.null(opt$weightOpt)) { | |
226 wantWeight <- FALSE | |
227 } else { | |
228 wantWeight <- TRUE | |
229 } | |
230 | |
231 if (is.null(opt$trend)) { | |
232 wantTrend <- FALSE | |
233 deMethod <- "limma-voom" | |
234 } else { | |
235 wantTrend <- TRUE | |
236 deMethod <- "limma-trend" | |
237 priorCount <- opt$trend | |
238 } | |
239 | |
240 | |
241 if (!is.null(opt$filesPath)) { | |
242 # Process the separate count files (adapted from DESeq2 wrapper) | |
243 library("rjson") | |
244 parser <- newJSONParser() | |
245 parser$addData(opt$filesPath) | |
246 factorList <- parser$getObject() | |
247 factors <- sapply(factorList, function(x) x[[1]]) | |
248 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | |
249 sampleTable <- data.frame(sample=basename(filenamesIn), | |
250 filename=filenamesIn, | |
251 row.names=filenamesIn, | |
252 stringsAsFactors=FALSE) | |
253 for (factor in factorList) { | |
254 factorName <- factor[[1]] | |
255 sampleTable[[factorName]] <- character(nrow(sampleTable)) | |
256 lvls <- sapply(factor[[2]], function(x) names(x)) | |
257 for (i in seq_along(factor[[2]])) { | |
258 files <- factor[[2]][[i]][[1]] | |
259 sampleTable[files,factorName] <- lvls[i] | |
260 } | |
261 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) | |
262 } | |
263 rownames(sampleTable) <- sampleTable$sample | |
264 rem <- c("sample","filename") | |
265 factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE] | |
266 | |
267 #read in count files and create single table | |
268 countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) | |
269 counts <- do.call("cbind", countfiles) | |
270 | |
271 } else { | |
272 # Process the single count matrix | |
273 counts <- read.table(opt$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) | |
274 row.names(counts) <- counts[, 1] | |
275 counts <- counts[ , -1] | |
276 countsRows <- nrow(counts) | |
277 | |
278 # Process factors | |
279 if (is.null(opt$factInput)) { | |
280 factorData <- read.table(opt$factFile, header=TRUE, sep="\t") | |
281 factors <- factorData[, -1, drop=FALSE] | |
282 } else { | |
283 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) | |
284 factorData <- list() | |
285 for (fact in factors) { | |
286 newFact <- unlist(strsplit(fact, split="::")) | |
287 factorData <- rbind(factorData, newFact) | |
288 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. | |
289 | |
290 # Set the row names to be the name of the factor and delete first row | |
291 row.names(factorData) <- factorData[, 1] | |
292 factorData <- factorData[, -1] | |
293 factorData <- sapply(factorData, sanitiseGroups) | |
294 factorData <- sapply(factorData, strsplit, split=",") | |
295 factorData <- sapply(factorData, make.names) | |
296 # Transform factor data into data frame of R factor objects | |
297 factors <- data.frame(factorData) | |
298 } | |
299 } | |
300 | |
301 # if annotation file provided | |
302 if (haveAnno) { | |
303 geneanno <- read.table(opt$annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) | |
304 } | |
203 | 305 |
204 #Create output directory | 306 #Create output directory |
205 dir.create(outPath, showWarnings=FALSE) | 307 dir.create(opt$outPath, showWarnings=FALSE) |
206 | 308 |
207 # Split up contrasts seperated by comma into a vector then sanitise | 309 # Split up contrasts seperated by comma into a vector then sanitise |
208 contrastData <- unlist(strsplit(contrastData, split=",")) | 310 contrastData <- unlist(strsplit(opt$contrastData, split=",")) |
209 contrastData <- sanitiseEquation(contrastData) | 311 contrastData <- sanitiseEquation(contrastData) |
210 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) | 312 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) |
211 | 313 |
212 bcvOutPdf <- makeOut("bcvplot.pdf") | 314 |
213 bcvOutPng <- makeOut("bcvplot.png") | 315 mdsOutPdf <- makeOut("mdsplot_nonorm.pdf") |
214 mdsOutPdf <- makeOut("mdsplot.pdf") | 316 mdsOutPng <- makeOut("mdsplot_nonorm.png") |
215 mdsOutPng <- makeOut("mdsplot.png") | 317 nmdsOutPdf <- makeOut("mdsplot.pdf") |
216 voomOutPdf <- makeOut("voomplot.pdf") | 318 nmdsOutPng <- makeOut("mdsplot.png") |
217 voomOutPng <- makeOut("voomplot.png") | |
218 maOutPdf <- character() # Initialise character vector | 319 maOutPdf <- character() # Initialise character vector |
219 maOutPng <- character() | 320 maOutPng <- character() |
220 topOut <- character() | 321 topOut <- character() |
221 for (i in 1:length(contrastData)) { | 322 for (i in 1:length(contrastData)) { |
222 maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf")) | 323 maOutPdf[i] <- makeOut(paste0("maplot_", contrastData[i], ".pdf")) |
223 maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png")) | 324 maOutPng[i] <- makeOut(paste0("maplot_", contrastData[i], ".png")) |
224 topOut[i] <- makeOut(paste0("limma-voom_", contrastData[i], ".tsv")) | 325 topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv")) |
225 } # Save output paths for each contrast as vectors | 326 } |
226 normOut <- makeOut("limma-voom_normcounts.tsv") | 327 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv")) |
227 rdaOut <- makeOut("RData.rda") | 328 rdaOut <- makeOut(paste0(deMethod, "_analysis.RData")) |
228 sessionOut <- makeOut("session_info.txt") | 329 sessionOut <- makeOut("session_info.txt") |
229 | 330 |
230 # Initialise data for html links and images, data frame with columns Label and | 331 # Initialise data for html links and images, data frame with columns Label and |
231 # Link | 332 # Link |
232 linkData <- data.frame(Label=character(), Link=character(), | 333 linkData <- data.frame(Label=character(), Link=character(), |
233 stringsAsFactors=FALSE) | 334 stringsAsFactors=FALSE) |
234 imageData <- data.frame(Label=character(), Link=character(), | 335 imageData <- data.frame(Label=character(), Link=character(), |
235 stringsAsFactors=FALSE) | 336 stringsAsFactors=FALSE) |
236 | 337 |
237 # Initialise vectors for storage of up/down/neutral regulated counts | 338 # Initialise vectors for storage of up/down/neutral regulated counts |
238 upCount <- numeric() | 339 upCount <- numeric() |
239 downCount <- numeric() | 340 downCount <- numeric() |
240 flatCount <- numeric() | 341 flatCount <- numeric() |
241 | |
242 # Read in counts and geneanno data | |
243 counts <- read.table(countPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) | |
244 row.names(counts) <- counts[, 1] | |
245 counts <- counts[ , -1] | |
246 countsRows <- nrow(counts) | |
247 if (haveAnno) { | |
248 geneanno <- read.table(annoPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) | |
249 } | |
250 | 342 |
251 ################################################################################ | 343 ################################################################################ |
252 ### Data Processing | 344 ### Data Processing |
253 ################################################################################ | 345 ################################################################################ |
254 | 346 |
255 # Extract counts and annotation data | 347 # Extract counts and annotation data |
348 print("Extracting counts") | |
256 data <- list() | 349 data <- list() |
257 data$counts <- counts | 350 data$counts <- counts |
258 if (haveAnno) { | 351 if (haveAnno) { |
259 data$genes <- geneanno | 352 data$genes <- geneanno |
260 } else { | 353 } else { |
261 data$genes <- data.frame(GeneID=row.names(counts)) | 354 data$genes <- data.frame(GeneID=row.names(counts)) |
262 } | 355 } |
263 | 356 |
264 # Filter out genes that do not have a required cpm in a required number of | 357 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of |
265 # samples | 358 # samples. Default is no filtering |
266 preFilterCount <- nrow(data$counts) | 359 preFilterCount <- nrow(data$counts) |
267 sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq | 360 |
268 data$counts <- data$counts[sel, ] | 361 if (filtCPM || filtSmpCount || filtTotCount) { |
269 data$genes <- data$genes[sel, ,drop = FALSE] | 362 |
363 if (filtTotCount) { | |
364 keep <- rowSums(data$counts) >= opt$cntReq | |
365 } else if (filtSmpCount) { | |
366 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq | |
367 } else if (filtCPM) { | |
368 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq | |
369 } | |
370 | |
371 data$counts <- data$counts[keep, ] | |
372 data$genes <- data$genes[keep, , drop=FALSE] | |
373 } | |
374 | |
270 postFilterCount <- nrow(data$counts) | 375 postFilterCount <- nrow(data$counts) |
271 filteredCount <- preFilterCount-postFilterCount | 376 filteredCount <- preFilterCount-postFilterCount |
272 | 377 |
273 # Creating naming data | 378 # Creating naming data |
274 samplenames <- colnames(data$counts) | 379 samplenames <- colnames(data$counts) |
275 sampleanno <- data.frame("sampleID"=samplenames, factors) | 380 sampleanno <- data.frame("sampleID"=samplenames, factors) |
276 | 381 |
277 | 382 |
278 # Generating the DGEList object "data" | 383 # Generating the DGEList object "data" |
384 print("Generating DGEList object") | |
279 data$samples <- sampleanno | 385 data$samples <- sampleanno |
280 data$samples$lib.size <- colSums(data$counts) | 386 data$samples$lib.size <- colSums(data$counts) |
281 data$samples$norm.factors <- 1 | 387 data$samples$norm.factors <- 1 |
282 row.names(data$samples) <- colnames(data$counts) | 388 row.names(data$samples) <- colnames(data$counts) |
283 data <- new("DGEList", data) | 389 data <- new("DGEList", data) |
284 | 390 |
391 print("Generating Design") | |
392 # Name rows of factors according to their sample | |
393 row.names(factors) <- names(data$counts) | |
285 factorList <- sapply(names(factors), pasteListName) | 394 factorList <- sapply(names(factors), pasteListName) |
286 | 395 formula <- "~0" |
287 formula <- "~0" | |
288 for (i in 1:length(factorList)) { | 396 for (i in 1:length(factorList)) { |
289 formula <- paste(formula,factorList[i], sep="+") | 397 formula <- paste(formula,factorList[i], sep="+") |
290 } | 398 } |
291 | |
292 formula <- formula(formula) | 399 formula <- formula(formula) |
293 design <- model.matrix(formula) | 400 design <- model.matrix(formula) |
294 | |
295 for (i in 1:length(factorList)) { | 401 for (i in 1:length(factorList)) { |
296 colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) | 402 colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) |
297 } | 403 } |
298 | 404 |
299 # Calculating normalising factor, estimating dispersion | 405 # Calculating normalising factors |
300 data <- calcNormFactors(data, method=normOpt) | 406 print("Calculating Normalisation Factors") |
301 #data <- estimateDisp(data, design=design, robust=TRUE) | 407 data <- calcNormFactors(data, method=opt$normOpt) |
302 | 408 |
303 # Generate contrasts information | 409 # Generate contrasts information |
410 print("Generating Contrasts") | |
304 contrasts <- makeContrasts(contrasts=contrastData, levels=design) | 411 contrasts <- makeContrasts(contrasts=contrastData, levels=design) |
305 | 412 |
306 # Name rows of factors according to their sample | |
307 row.names(factors) <- names(data$counts) | |
308 | |
309 ################################################################################ | 413 ################################################################################ |
310 ### Data Output | 414 ### Data Output |
311 ################################################################################ | 415 ################################################################################ |
312 | |
313 # BCV Plot | |
314 #png(bcvOutPng, width=600, height=600) | |
315 #plotBCV(data, main="BCV Plot") | |
316 #imageData[1, ] <- c("BCV Plot", "bcvplot.png") | |
317 #invisible(dev.off()) | |
318 | |
319 #pdf(bcvOutPdf) | |
320 #plotBCV(data, main="BCV Plot") | |
321 #invisible(dev.off()) | |
322 | |
323 if (wantWeight) { | |
324 # Creating voom data object and plot | |
325 png(voomOutPng, width=1000, height=600) | |
326 vData <- voomWithQualityWeights(data, design=design, plot=TRUE) | |
327 imageData[1, ] <- c("Voom Plot", "voomplot.png") | |
328 invisible(dev.off()) | |
329 | |
330 pdf(voomOutPdf, width=14) | |
331 vData <- voomWithQualityWeights(data, design=design, plot=TRUE) | |
332 linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf") | |
333 invisible(dev.off()) | |
334 | |
335 # Generating fit data and top table with weights | |
336 wts <- vData$weights | |
337 voomFit <- lmFit(vData, design, weights=wts) | |
338 | |
339 } else { | |
340 # Creating voom data object and plot | |
341 png(voomOutPng, width=600, height=600) | |
342 vData <- voom(data, design=design, plot=TRUE) | |
343 imageData[1, ] <- c("Voom Plot", "voomplot.png") | |
344 invisible(dev.off()) | |
345 | |
346 pdf(voomOutPdf) | |
347 vData <- voom(data, design=design, plot=TRUE) | |
348 linkData[1, ] <- c("Voom Plot (.pdf)", "voomplot.pdf") | |
349 invisible(dev.off()) | |
350 | |
351 # Generate voom fit | |
352 voomFit <- lmFit(vData, design) | |
353 | |
354 } | |
355 | |
356 # Save normalised counts (log2cpm) | |
357 if (wantNorm) { | |
358 norm_counts <- data.frame(vData$genes, vData$E) | |
359 write.table (norm_counts, file=normOut, row.names=FALSE, sep="\t") | |
360 linkData <- rbind(linkData, c("limma-voom_normcounts.tsv", "limma-voom_normcounts.tsv")) | |
361 } | |
362 | |
363 # Fit linear model and estimate dispersion with eBayes | |
364 voomFit <- contrasts.fit(voomFit, contrasts) | |
365 voomFit <- eBayes(voomFit) | |
366 | |
367 # Plot MDS | 416 # Plot MDS |
417 print("Generating MDS plot") | |
368 labels <- names(counts) | 418 labels <- names(counts) |
369 png(mdsOutPng, width=600, height=600) | 419 png(mdsOutPng, width=600, height=600) |
370 # Currently only using a single factor | 420 # Currently only using a single factor |
371 plotMDS(vData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8) | 421 plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (unnormalised)") |
372 imgName <- "Voom Plot" | 422 imageData[1, ] <- c("MDS Plot (unnormalised)", "mdsplot_nonorm.png") |
423 invisible(dev.off()) | |
424 | |
425 pdf(mdsOutPdf) | |
426 plotMDS(data, labels=labels, cex=0.5) | |
427 linkData[1, ] <- c("MDS Plot (unnormalised).pdf", "mdsplot_nonorm.pdf") | |
428 invisible(dev.off()) | |
429 | |
430 if (wantTrend) { | |
431 # limma-trend approach | |
432 logCPM <- cpm(data, log=TRUE, prior.count=opt$trend) | |
433 fit <- lmFit(logCPM, design) | |
434 fit <- contrasts.fit(fit, contrasts) | |
435 if (wantRobust) { | |
436 fit <- eBayes(fit, trend=TRUE, robust=TRUE) | |
437 } else { | |
438 fit <- eBayes(fit, trend=TRUE, robust=FALSE) | |
439 } | |
440 # plot fit with plotSA | |
441 saOutPng <- makeOut("saplot.png") | |
442 saOutPdf <- makeOut("saplot.pdf") | |
443 | |
444 png(saOutPng, width=600, height=600) | |
445 plotSA(fit, main="SA Plot") | |
446 imgName <- "SA Plot.png" | |
447 imgAddr <- "saplot.png" | |
448 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
449 invisible(dev.off()) | |
450 | |
451 pdf(saOutPdf, width=14) | |
452 plotSA(fit, main="SA Plot") | |
453 linkName <- paste0("SA Plot.pdf") | |
454 linkAddr <- paste0("saplot.pdf") | |
455 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
456 invisible(dev.off()) | |
457 | |
458 plotData <- logCPM | |
459 | |
460 # Save normalised counts (log2cpm) | |
461 if (wantNorm) { | |
462 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t") | |
463 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) | |
464 } | |
465 } else { | |
466 # limma-voom approach | |
467 voomOutPdf <- makeOut("voomplot.pdf") | |
468 voomOutPng <- makeOut("voomplot.png") | |
469 | |
470 if (wantWeight) { | |
471 # Creating voom data object and plot | |
472 png(voomOutPng, width=1000, height=600) | |
473 vData <- voomWithQualityWeights(data, design=design, plot=TRUE) | |
474 imgName <- "Voom Plot.png" | |
475 imgAddr <- "voomplot.png" | |
476 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
477 invisible(dev.off()) | |
478 | |
479 pdf(voomOutPdf, width=14) | |
480 vData <- voomWithQualityWeights(data, design=design, plot=TRUE) | |
481 linkName <- paste0("Voom Plot.pdf") | |
482 linkAddr <- paste0("voomplot.pdf") | |
483 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
484 invisible(dev.off()) | |
485 | |
486 # Generating fit data and top table with weights | |
487 wts <- vData$weights | |
488 voomFit <- lmFit(vData, design, weights=wts) | |
489 | |
490 } else { | |
491 # Creating voom data object and plot | |
492 png(voomOutPng, width=600, height=600) | |
493 vData <- voom(data, design=design, plot=TRUE) | |
494 imgName <- "Voom Plot" | |
495 imgAddr <- "voomplot.png" | |
496 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
497 invisible(dev.off()) | |
498 | |
499 pdf(voomOutPdf) | |
500 vData <- voom(data, design=design, plot=TRUE) | |
501 linkName <- paste0("Voom Plot.pdf") | |
502 linkAddr <- paste0("voomplot.pdf") | |
503 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
504 invisible(dev.off()) | |
505 | |
506 # Generate voom fit | |
507 voomFit <- lmFit(vData, design) | |
508 } | |
509 | |
510 # Save normalised counts (log2cpm) | |
511 if (wantNorm) { | |
512 norm_counts <- data.frame(vData$genes, vData$E) | |
513 write.table(norm_counts, file=normOut, row.names=FALSE, sep="\t") | |
514 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts.tsv")))) | |
515 } | |
516 | |
517 # Fit linear model and estimate dispersion with eBayes | |
518 voomFit <- contrasts.fit(voomFit, contrasts) | |
519 if (wantRobust) { | |
520 fit <- eBayes(voomFit, robust=TRUE) | |
521 } else { | |
522 fit <- eBayes(voomFit, robust=FALSE) | |
523 } | |
524 plotData <- vData | |
525 } | |
526 | |
527 print("Generating normalised MDS plot") | |
528 png(nmdsOutPng, width=600, height=600) | |
529 # Currently only using a single factor | |
530 plotMDS(plotData, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot (normalised)") | |
531 imgName <- "MDS Plot (normalised)" | |
373 imgAddr <- "mdsplot.png" | 532 imgAddr <- "mdsplot.png" |
374 imageData <- rbind(imageData, c(imgName, imgAddr)) | 533 imageData <- rbind(imageData, c(imgName, imgAddr)) |
375 invisible(dev.off()) | 534 invisible(dev.off()) |
376 | 535 |
377 pdf(mdsOutPdf) | 536 pdf(nmdsOutPdf) |
378 plotMDS(vData, labels=labels, cex=0.5) | 537 plotMDS(plotData, labels=labels, cex=0.5) |
379 linkName <- paste0("MDS Plot (.pdf)") | 538 linkName <- paste0("MDS Plot (normalised).pdf") |
380 linkAddr <- paste0("mdsplot.pdf") | 539 linkAddr <- paste0("mdsplot.pdf") |
381 linkData <- rbind(linkData, c(linkName, linkAddr)) | 540 linkData <- rbind(linkData, c(linkName, linkAddr)) |
382 invisible(dev.off()) | 541 invisible(dev.off()) |
383 | 542 |
384 | 543 |
544 print("Generating DE results") | |
545 status = decideTests(fit, adjust.method=opt$pAdjOpt, p.value=opt$pValReq, | |
546 lfc=opt$lfcReq) | |
547 sumStatus <- summary(status) | |
548 | |
385 for (i in 1:length(contrastData)) { | 549 for (i in 1:length(contrastData)) { |
386 | 550 # Collect counts for differential expression |
387 status = decideTests(voomFit[, i], adjust.method=pAdjOpt, p.value=pValReq, | 551 upCount[i] <- sumStatus["Up", i] |
388 lfc=lfcReq) | 552 downCount[i] <- sumStatus["Down", i] |
389 | 553 flatCount[i] <- sumStatus["NotSig", i] |
390 sumStatus <- summary(status) | 554 |
391 | 555 # Write top expressions table |
392 # Collect counts for differential expression | 556 top <- topTable(fit, coef=i, number=Inf, sort.by="P") |
393 upCount[i] <- sumStatus["1",] | 557 if (wantTrend) { |
394 downCount[i] <- sumStatus["-1",] | 558 write.table(top, file=topOut[i], row.names=TRUE, sep="\t") |
395 flatCount[i] <- sumStatus["0",] | 559 } else { |
396 | 560 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") |
397 # Write top expressions table | 561 } |
398 top <- topTable(voomFit, coef=i, number=Inf, sort.by="P") | 562 |
399 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") | 563 linkName <- paste0(deMethod, "_", contrastData[i], ".tsv") |
400 | 564 linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv") |
401 linkName <- paste0("limma-voom_", contrastData[i], ".tsv") | 565 linkData <- rbind(linkData, c(linkName, linkAddr)) |
402 linkAddr <- paste0("limma-voom_", contrastData[i], ".tsv") | 566 |
403 linkData <- rbind(linkData, c(linkName, linkAddr)) | 567 # Plot MA (log ratios vs mean average) using limma package on weighted |
404 | 568 pdf(maOutPdf[i]) |
405 # Plot MA (log ratios vs mean average) using limma package on weighted | 569 limma::plotMD(fit, status=status, coef=i, |
406 pdf(maOutPdf[i]) | 570 main=paste("MA Plot:", unmake.names(contrastData[i])), |
407 limma::plotMA(voomFit, status=status, coef=i, | 571 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), |
408 main=paste("MA Plot:", unmake.names(contrastData[i])), | 572 xlab="Average Expression", ylab="logFC") |
409 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), | 573 |
410 xlab="Average Expression", ylab="logFC") | 574 abline(h=0, col="grey", lty=2) |
411 | 575 |
412 abline(h=0, col="grey", lty=2) | 576 linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)") |
413 | 577 linkAddr <- paste0("maplot_", contrastData[i], ".pdf") |
414 linkName <- paste0("MA Plot_", contrastData[i], " (.pdf)") | 578 linkData <- rbind(linkData, c(linkName, linkAddr)) |
415 linkAddr <- paste0("maplot_", contrastData[i], ".pdf") | 579 invisible(dev.off()) |
416 linkData <- rbind(linkData, c(linkName, linkAddr)) | 580 |
417 invisible(dev.off()) | 581 png(maOutPng[i], height=600, width=600) |
418 | 582 limma::plotMD(fit, status=status, coef=i, |
419 png(maOutPng[i], height=600, width=600) | 583 main=paste("MA Plot:", unmake.names(contrastData[i])), |
420 limma::plotMA(voomFit, status=status, coef=i, | 584 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), |
421 main=paste("MA Plot:", unmake.names(contrastData[i])), | 585 xlab="Average Expression", ylab="logFC") |
422 col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), | 586 |
423 xlab="Average Expression", ylab="logFC") | 587 abline(h=0, col="grey", lty=2) |
424 | 588 |
425 abline(h=0, col="grey", lty=2) | 589 imgName <- paste0("MA Plot_", contrastData[i]) |
426 | 590 imgAddr <- paste0("maplot_", contrastData[i], ".png") |
427 imgName <- paste0("MA Plot_", contrastData[i]) | 591 imageData <- rbind(imageData, c(imgName, imgAddr)) |
428 imgAddr <- paste0("maplot_", contrastData[i], ".png") | 592 invisible(dev.off()) |
429 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
430 invisible(dev.off()) | |
431 } | 593 } |
432 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) | 594 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) |
433 row.names(sigDiff) <- contrastData | 595 row.names(sigDiff) <- contrastData |
434 | 596 |
435 # Save relevant items as rda object | 597 # Save relevant items as rda object |
436 if (wantRda) { | 598 if (wantRda) { |
437 if (wantWeight) { | 599 print("Saving RData") |
438 save(data, status, vData, labels, factors, wts, voomFit, top, contrasts, | 600 if (wantWeight) { |
439 design, | 601 save(data, status, plotData, labels, factors, wts, fit, top, contrasts, |
440 file=rdaOut, ascii=TRUE) | 602 design, |
441 } else { | 603 file=rdaOut, ascii=TRUE) |
442 save(data, status, vData, labels, factors, voomFit, top, contrasts, design, | 604 } else { |
443 file=rdaOut, ascii=TRUE) | 605 save(data, status, plotData, labels, factors, fit, top, contrasts, design, |
444 } | 606 file=rdaOut, ascii=TRUE) |
445 linkData <- rbind(linkData, c("RData (.rda)", "RData.rda")) | 607 } |
608 linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData")))) | |
446 } | 609 } |
447 | 610 |
448 # Record session info | 611 # Record session info |
449 writeLines(capture.output(sessionInfo()), sessionOut) | 612 writeLines(capture.output(sessionInfo()), sessionOut) |
450 linkData <- rbind(linkData, c("Session Info", "session_info.txt")) | 613 linkData <- rbind(linkData, c("Session Info", "session_info.txt")) |
456 ################################################################################ | 619 ################################################################################ |
457 ### HTML Generation | 620 ### HTML Generation |
458 ################################################################################ | 621 ################################################################################ |
459 | 622 |
460 # Clear file | 623 # Clear file |
461 cat("", file=htmlPath) | 624 cat("", file=opt$htmlPath) |
462 | 625 |
463 cata("<html>\n") | 626 cata("<html>\n") |
464 | 627 |
465 cata("<body>\n") | 628 cata("<body>\n") |
466 cata("<h3>Limma-voom Analysis Output:</h3>\n") | 629 cata("<h3>Limma Analysis Output:</h3>\n") |
467 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") | 630 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") |
468 if (wantWeight) { | 631 if (wantWeight) { |
469 HtmlImage(imageData$Link[1], imageData$Label[1], width=1000) | 632 HtmlImage(imageData$Link[1], imageData$Label[1], width=1000) |
470 } else { | 633 } else { |
471 HtmlImage(imageData$Link[1], imageData$Label[1]) | 634 HtmlImage(imageData$Link[1], imageData$Label[1]) |
472 } | 635 } |
473 | 636 |
474 for (i in 2:nrow(imageData)) { | 637 for (i in 2:nrow(imageData)) { |
475 HtmlImage(imageData$Link[i], imageData$Label[i]) | 638 HtmlImage(imageData$Link[i], imageData$Label[i]) |
476 } | 639 } |
477 | 640 |
478 cata("<h4>Differential Expression Counts:</h4>\n") | 641 cata("<h4>Differential Expression Counts:</h4>\n") |
479 | 642 |
480 cata("<table border=\"1\" cellpadding=\"4\">\n") | 643 cata("<table border=\"1\" cellpadding=\"4\">\n") |
481 cata("<tr>\n") | 644 cata("<tr>\n") |
482 TableItem() | 645 TableItem() |
483 for (i in colnames(sigDiff)) { | 646 for (i in colnames(sigDiff)) { |
484 TableHeadItem(i) | 647 TableHeadItem(i) |
485 } | 648 } |
486 cata("</tr>\n") | 649 cata("</tr>\n") |
487 for (i in 1:nrow(sigDiff)) { | 650 for (i in 1:nrow(sigDiff)) { |
488 cata("<tr>\n") | 651 cata("<tr>\n") |
489 TableHeadItem(unmake.names(row.names(sigDiff)[i])) | 652 TableHeadItem(unmake.names(row.names(sigDiff)[i])) |
490 for (j in 1:ncol(sigDiff)) { | 653 for (j in 1:ncol(sigDiff)) { |
491 TableItem(as.character(sigDiff[i, j])) | 654 TableItem(as.character(sigDiff[i, j])) |
492 } | 655 } |
493 cata("</tr>\n") | 656 cata("</tr>\n") |
494 } | 657 } |
495 cata("</table>") | 658 cata("</table>") |
496 | 659 |
497 cata("<h4>Plots:</h4>\n") | 660 cata("<h4>Plots:</h4>\n") |
498 for (i in 1:nrow(linkData)) { | 661 for (i in 1:nrow(linkData)) { |
499 if (grepl(".pdf", linkData$Link[i])) { | 662 if (grepl(".pdf", linkData$Link[i])) { |
500 HtmlLink(linkData$Link[i], linkData$Label[i]) | 663 HtmlLink(linkData$Link[i], linkData$Label[i]) |
501 } | 664 } |
502 } | 665 } |
503 | 666 |
504 cata("<h4>Tables:</h4>\n") | 667 cata("<h4>Tables:</h4>\n") |
505 for (i in 1:nrow(linkData)) { | 668 for (i in 1:nrow(linkData)) { |
506 if (grepl(".tsv", linkData$Link[i])) { | 669 if (grepl(".tsv", linkData$Link[i])) { |
507 HtmlLink(linkData$Link[i], linkData$Label[i]) | 670 HtmlLink(linkData$Link[i], linkData$Label[i]) |
508 } | 671 } |
509 } | 672 } |
510 | 673 |
511 if (wantRda) { | 674 if (wantRda) { |
512 cata("<h4>R Data Object:</h4>\n") | 675 cata("<h4>R Data Object:</h4>\n") |
513 for (i in 1:nrow(linkData)) { | 676 for (i in 1:nrow(linkData)) { |
514 if (grepl(".rda", linkData$Link[i])) { | 677 if (grepl(".RData", linkData$Link[i])) { |
515 HtmlLink(linkData$Link[i], linkData$Label[i]) | 678 HtmlLink(linkData$Link[i], linkData$Label[i]) |
516 } | 679 } |
517 } | 680 } |
518 } | 681 } |
519 | 682 |
520 cata("<p>Alt-click links to download file.</p>\n") | 683 cata("<p>Alt-click links to download file.</p>\n") |
521 cata("<p>Click floppy disc icon associated history item to download ") | 684 cata("<p>Click floppy disc icon associated history item to download ") |
522 cata("all files.</p>\n") | 685 cata("all files.</p>\n") |
523 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") | 686 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") |
524 | 687 |
525 cata("<h4>Additional Information</h4>\n") | 688 cata("<h4>Additional Information</h4>\n") |
526 cata("<ul>\n") | 689 cata("<ul>\n") |
527 if (cpmReq!=0 && sampleReq!=0) { | 690 |
528 tempStr <- paste("Genes without more than", cpmReq, | 691 if (filtCPM || filtSmpCount || filtTotCount) { |
529 "CPM in at least", sampleReq, "samples are insignificant", | 692 if (filtCPM) { |
530 "and filtered out.") | 693 tempStr <- paste("Genes without more than", opt$cmpReq, |
531 ListItem(tempStr) | 694 "CPM in at least", opt$sampleReq, "samples are insignificant", |
532 filterProp <- round(filteredCount/preFilterCount*100, digits=2) | 695 "and filtered out.") |
533 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, | 696 } else if (filtSmpCount) { |
534 "%) genes were filtered out for low expression.") | 697 tempStr <- paste("Genes without more than", opt$cntReq, |
535 ListItem(tempStr) | 698 "counts in at least", opt$sampleReq, "samples are insignificant", |
536 } | 699 "and filtered out.") |
537 ListItem(normOpt, " was the method used to normalise library sizes.") | 700 } else if (filtTotCount) { |
701 tempStr <- paste("Genes without more than", opt$cntReq, | |
702 "counts, after summing counts for all samples, are insignificant", | |
703 "and filtered out.") | |
704 } | |
705 | |
706 ListItem(tempStr) | |
707 filterProp <- round(filteredCount/preFilterCount*100, digits=2) | |
708 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, | |
709 "%) genes were filtered out for low expression.") | |
710 ListItem(tempStr) | |
711 } | |
712 ListItem(opt$normOpt, " was the method used to normalise library sizes.") | |
713 if (wantTrend) { | |
714 ListItem("The limma-trend method was used.") | |
715 } else { | |
716 ListItem("The limma-voom method was used.") | |
717 } | |
538 if (wantWeight) { | 718 if (wantWeight) { |
539 ListItem("Weights were applied to samples.") | 719 ListItem("Weights were applied to samples.") |
540 } else { | 720 } else { |
541 ListItem("Weights were not applied to samples.") | 721 ListItem("Weights were not applied to samples.") |
542 } | 722 } |
543 if (pAdjOpt!="none") { | 723 if (wantRobust) { |
544 if (pAdjOpt=="BH" || pAdjOpt=="BY") { | 724 ListItem("eBayes was used with robust settings (robust=TRUE).") |
545 tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ", | 725 } |
546 "of ", pValReq," and exhibit log2-fold-change of at ", | 726 if (opt$pAdjOpt!="none") { |
547 "least ", lfcReq, ".") | 727 if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") { |
548 ListItem(tempStr) | 728 tempStr <- paste0("MA-Plot highlighted genes are significant at FDR ", |
549 } else if (pAdjOpt=="holm") { | 729 "of ", opt$pValReq," and exhibit log2-fold-change of at ", |
550 tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ", | 730 "least ", opt$lfcReq, ".") |
551 "p-value of ", pValReq," by the Holm(1979) ", | 731 ListItem(tempStr) |
552 "method, and exhibit log2-fold-change of at least ", | 732 } else if (opt$pAdjOpt=="holm") { |
553 lfcReq, ".") | 733 tempStr <- paste0("MA-Plot highlighted genes are significant at adjusted ", |
554 ListItem(tempStr) | 734 "p-value of ", opt$pValReq," by the Holm(1979) ", |
555 } | 735 "method, and exhibit log2-fold-change of at least ", |
556 } else { | 736 opt$lfcReq, ".") |
557 tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ", | 737 ListItem(tempStr) |
558 "of ", pValReq," and exhibit log2-fold-change of at ", | 738 } |
559 "least ", lfcReq, ".") | 739 } else { |
560 ListItem(tempStr) | 740 tempStr <- paste0("MA-Plot highlighted genes are significant at p-value ", |
741 "of ", opt$pValReq," and exhibit log2-fold-change of at ", | |
742 "least ", opt$lfcReq, ".") | |
743 ListItem(tempStr) | |
561 } | 744 } |
562 cata("</ul>\n") | 745 cata("</ul>\n") |
563 | 746 |
564 cata("<h4>Summary of experimental data:</h4>\n") | 747 cata("<h4>Summary of experimental data:</h4>\n") |
565 | 748 |
568 cata("<table border=\"1\" cellpadding=\"3\">\n") | 751 cata("<table border=\"1\" cellpadding=\"3\">\n") |
569 cata("<tr>\n") | 752 cata("<tr>\n") |
570 TableHeadItem("SampleID") | 753 TableHeadItem("SampleID") |
571 TableHeadItem(names(factors)[1]," (Primary Factor)") | 754 TableHeadItem(names(factors)[1]," (Primary Factor)") |
572 | 755 |
573 if (ncol(factors) > 1) { | 756 if (ncol(factors) > 1) { |
574 | |
575 for (i in names(factors)[2:length(names(factors))]) { | 757 for (i in names(factors)[2:length(names(factors))]) { |
576 TableHeadItem(i) | 758 TableHeadItem(i) |
577 } | 759 } |
578 cata("</tr>\n") | 760 cata("</tr>\n") |
579 } | 761 } |
580 | 762 |
581 for (i in 1:nrow(factors)) { | 763 for (i in 1:nrow(factors)) { |
582 cata("<tr>\n") | 764 cata("<tr>\n") |
583 TableHeadItem(row.names(factors)[i]) | 765 TableHeadItem(row.names(factors)[i]) |
584 for (j in 1:ncol(factors)) { | 766 for (j in 1:ncol(factors)) { |
585 TableItem(as.character(unmake.names(factors[i, j]))) | 767 TableItem(as.character(unmake.names(factors[i, j]))) |
586 } | 768 } |
587 cata("</tr>\n") | 769 cata("</tr>\n") |
588 } | 770 } |
589 cata("</table>") | 771 cata("</table>") |
590 | 772 |
636 "respect to biological variation. Nucleic Acids Research 40,", | 818 "respect to biological variation. Nucleic Acids Research 40,", |
637 "4288-4297") | 819 "4288-4297") |
638 cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:", | 820 cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:", |
639 "precision weights unlock linear model analysis tools for", | 821 "precision weights unlock linear model analysis tools for", |
640 "RNA-seq read counts. Genome Biology 15, R29.") | 822 "RNA-seq read counts. Genome Biology 15, R29.") |
641 cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,", | 823 cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,", |
642 "Dobrovic A, Holloway A and Smyth GK (2006).", | 824 "Dobrovic A, Holloway A and Smyth GK (2006).", |
643 "Empirical array quality weights for microarray data.", | 825 "Empirical array quality weights for microarray data.", |
644 "BMC Bioinformatics 7, Article 261.") | 826 "BMC Bioinformatics 7, Article 261.") |
645 cata("<h3>Citations</h3>\n") | 827 cata("<h3>Citations</h3>\n") |
646 cata(cit[1], "\n") | 828 cata(cit[1], "\n") |
665 cata("</ol>\n") | 847 cata("</ol>\n") |
666 | 848 |
667 cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n") | 849 cata("<p>Please report problems or suggestions to: su.s@wehi.edu.au</p>\n") |
668 | 850 |
669 for (i in 1:nrow(linkData)) { | 851 for (i in 1:nrow(linkData)) { |
670 if (grepl("session_info", linkData$Link[i])) { | 852 if (grepl("session_info", linkData$Link[i])) { |
671 HtmlLink(linkData$Link[i], linkData$Label[i]) | 853 HtmlLink(linkData$Link[i], linkData$Label[i]) |
672 } | 854 } |
673 } | 855 } |
674 | 856 |
675 cata("<table border=\"0\">\n") | 857 cata("<table border=\"0\">\n") |
676 cata("<tr>\n") | 858 cata("<tr>\n") |
677 TableItem("Task started at:"); TableItem(timeStart) | 859 TableItem("Task started at:"); TableItem(timeStart) |