Mercurial > repos > shians > shrnaseq
annotate hairpinTool.R @ 11:c0a76e30d61b
- Removed max on cpm filtering. Really shouldn't have been there in the first
place.
- Added explicit arguments to processHairpinReads in R.
author | shian_su <registertonysu@gmail.com> |
---|---|
date | Tue, 30 Sep 2014 16:36:12 +1000 |
parents | 8923d4ea858b |
children | ebb4cb1e8e35 |
rev | line source |
---|---|
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
1 # ARGS: 1.inputType -String specifying format of input (fastq or table) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
2 # IF inputType is "fastQ": |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
3 # 2*.fastqPath -One or more strings specifying path to fastq files |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
4 # 2.annoPath -String specifying path to hairpin annotation table |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
5 # 3.samplePath -String specifying path to sample annotation table |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
6 # 4.barStart -Integer specifying starting position of barcode |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
7 # 5.barEnd -Integer specifying ending position of barcode |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
8 # 6.hpStart -Integer specifying startins position of hairpin |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
9 # unique region |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
10 # 7.hpEnd -Integer specifying ending position of hairpin |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
11 # unique region |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
12 # ### |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
13 # IF inputType is "counts": |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
14 # 2.countPath -String specifying path to count table |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
15 # 3.annoPath -String specifying path to hairpin annotation table |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
16 # 4.samplePath -String specifying path to sample annotation table |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
17 # ### |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
18 # 8.cpmReq -Float specifying cpm requirement |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
19 # 9.sampleReq -Integer specifying cpm requirement |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
20 # 10.fdrThresh -Float specifying the FDR requirement |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
21 # 11.lfcThresh -Float specifying the log-fold-change requirement |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
22 # 12.workMode -String specifying exact test or GLM usage |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
23 # 13.htmlPath -String specifying path to HTML file |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
24 # 14.folderPath -STring specifying path to folder for output |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
25 # IF workMode is "classic" (exact test) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
26 # 15.pairData[2] -String specifying first group for exact test |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
27 # 16.pairData[1] -String specifying second group for exact test |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
28 # ### |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
29 # IF workMode is "glm" |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
30 # 15.contrastData -String specifying contrasts to be made |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
31 # 16.roastOpt -String specifying usage of gene-wise tests |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
32 # 17.hairpinReq -String specifying hairpin requirement for gene- |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
33 # wise test |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
34 # 18.selectOpt -String specifying type of selection for barcode |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
35 # plots |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
36 # 19.selectVals -String specifying members selected for barcode |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
37 # plots |
9
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
38 # ### |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
39 # |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
40 # OUT: Bar Plot of Counts Per Index |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
41 # Bar Plot of Counts Per Hairpin |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
42 # MDS Plot |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
43 # Smear Plot |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
44 # Barcode Plots (If Genewise testing was selected) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
45 # Top Expression Table |
7 | 46 # Feature Counts Table |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
47 # HTML file linking to the ouputs |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
48 # |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
49 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
50 |
2 | 51 # Record starting time |
52 timeStart <- as.character(Sys.time()) | |
53 | |
54 # Loading and checking required packages | |
55 library(methods, quietly=TRUE, warn.conflicts=FALSE) | |
56 library(statmod, quietly=TRUE, warn.conflicts=FALSE) | |
57 library(splines, quietly=TRUE, warn.conflicts=FALSE) | |
58 library(edgeR, quietly=TRUE, warn.conflicts=FALSE) | |
59 library(limma, quietly=TRUE, warn.conflicts=FALSE) | |
60 | |
9
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
61 if (packageVersion("edgeR") < "3.5.27") { |
8 | 62 stop("Please update 'edgeR' to version >= 3.5.23 to run this tool") |
63 } | |
64 | |
65 if (packageVersion("limma")<"3.19.19") { | |
66 message("Update 'limma' to version >= 3.19.19 to see updated barcode graphs") | |
2 | 67 } |
68 | |
69 ################################################################################ | |
70 ### Function declarations | |
71 ################################################################################ | |
72 | |
7 | 73 # Function to load libaries without messages |
74 silentLibrary <- function(...) { | |
75 list <- c(...) | |
76 for (package in list){ | |
77 suppressPackageStartupMessages(library(package, character.only=TRUE)) | |
78 } | |
79 } | |
80 | |
2 | 81 # Function to sanitise contrast equations so there are no whitespaces |
82 # surrounding the arithmetic operators, leading or trailing whitespace | |
83 sanitiseEquation <- function(equation) { | |
84 equation <- gsub(" *[+] *", "+", equation) | |
85 equation <- gsub(" *[-] *", "-", equation) | |
86 equation <- gsub(" *[/] *", "/", equation) | |
87 equation <- gsub(" *[*] *", "*", equation) | |
88 equation <- gsub("^\\s+|\\s+$", "", equation) | |
89 return(equation) | |
90 } | |
91 | |
92 # Function to sanitise group information | |
93 sanitiseGroups <- function(string) { | |
94 string <- gsub(" *[,] *", ",", string) | |
95 string <- gsub("^\\s+|\\s+$", "", string) | |
96 return(string) | |
97 } | |
98 | |
99 # Function to change periods to whitespace in a string | |
100 unmake.names <- function(string) { | |
101 string <- gsub(".", " ", string, fixed=TRUE) | |
102 return(string) | |
103 } | |
104 | |
105 # Function has string input and generates an output path string | |
106 makeOut <- function(filename) { | |
107 return(paste0(folderPath, "/", filename)) | |
108 } | |
109 | |
110 # Function has string input and generates both a pdf and png output strings | |
111 imgOut <- function(filename) { | |
112 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), | |
7 | 113 envir=.GlobalEnv) |
2 | 114 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), |
7 | 115 envir=.GlobalEnv) |
2 | 116 } |
117 | |
118 # Create cat function default path set, default seperator empty and appending | |
119 # true by default (Ripped straight from the cat function with altered argument | |
120 # defaults) | |
7 | 121 cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL, |
122 append=TRUE) { | |
2 | 123 if (is.character(file)) |
124 if (file == "") | |
125 file <- stdout() | |
126 else if (substring(file, 1L, 1L) == "|") { | |
127 file <- pipe(substring(file, 2L), "w") | |
128 on.exit(close(file)) | |
129 } | |
130 else { | |
131 file <- file(file, ifelse(append, "a", "w")) | |
132 on.exit(close(file)) | |
133 } | |
134 .Internal(cat(list(...), file, sep, fill, labels, append)) | |
135 } | |
136 | |
137 # Function to write code for html head and title | |
138 HtmlHead <- function(title) { | |
139 cata("<head>\n") | |
140 cata("<title>", title, "</title>\n") | |
141 cata("</head>\n") | |
142 } | |
143 | |
144 # Function to write code for html links | |
145 HtmlLink <- function(address, label=address) { | |
146 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") | |
147 } | |
148 | |
149 # Function to write code for html images | |
150 HtmlImage <- function(source, label=source, height=600, width=600) { | |
151 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) | |
152 cata("\" width=\"", width, "\"/>\n") | |
153 } | |
154 | |
155 # Function to write code for html list items | |
156 ListItem <- function(...) { | |
157 cata("<li>", ..., "</li>\n") | |
158 } | |
159 | |
160 TableItem <- function(...) { | |
161 cata("<td>", ..., "</td>\n") | |
162 } | |
163 | |
164 TableHeadItem <- function(...) { | |
165 cata("<th>", ..., "</th>\n") | |
166 } | |
167 ################################################################################ | |
168 ### Input Processing | |
169 ################################################################################ | |
170 | |
171 # Grabbing arguments from command line | |
172 argv <- commandArgs(TRUE) | |
173 | |
174 # Remove fastq file paths after collecting from argument vector | |
175 inputType <- as.character(argv[1]) | |
176 if (inputType=="fastq") { | |
177 fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)], | |
178 fixed=TRUE)) | |
179 argv <- argv[!grepl("fastq::", argv, fixed=TRUE)] | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
180 annoPath <- as.character(argv[2]) |
2 | 181 samplePath <- as.character(argv[3]) |
182 barStart <- as.numeric(argv[4]) | |
183 barEnd <- as.numeric(argv[5]) | |
184 hpStart <- as.numeric(argv[6]) | |
185 hpEnd <- as.numeric(argv[7]) | |
186 } else if (inputType=="counts") { | |
187 countPath <- as.character(argv[2]) | |
188 annoPath <- as.character(argv[3]) | |
189 samplePath <- as.character(argv[4]) | |
190 } | |
191 | |
192 cpmReq <- as.numeric(argv[8]) | |
193 sampleReq <- as.numeric(argv[9]) | |
194 fdrThresh <- as.numeric(argv[10]) | |
195 lfcThresh <- as.numeric(argv[11]) | |
196 workMode <- as.character(argv[12]) | |
197 htmlPath <- as.character(argv[13]) | |
198 folderPath <- as.character(argv[14]) | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
199 |
2 | 200 if (workMode=="classic") { |
201 pairData <- character() | |
202 pairData[2] <- as.character(argv[15]) | |
203 pairData[1] <- as.character(argv[16]) | |
204 } else if (workMode=="glm") { | |
205 contrastData <- as.character(argv[15]) | |
206 roastOpt <- as.character(argv[16]) | |
207 hairpinReq <- as.numeric(argv[17]) | |
208 selectOpt <- as.character(argv[18]) | |
209 selectVals <- as.character(argv[19]) | |
210 } | |
211 | |
212 # Read in inputs | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
213 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
214 samples <- read.table(samplePath, header=TRUE, sep="\t") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
215 anno <- read.table(annoPath, header=TRUE, sep="\t") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
216 if (inputType=="counts") { |
2 | 217 counts <- read.table(countPath, header=TRUE, sep="\t") |
218 } | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
219 |
2 | 220 ###################### Check inputs for correctness ############################ |
221 samples$ID <- make.names(samples$ID) | |
222 | |
223 if (!any(grepl("group", names(samples)))) { | |
224 stop("'group' column not specified in sample annotation file") | |
225 } # Check if grouping variable has been specified | |
226 | |
227 if (any(table(samples$ID)>1)){ | |
228 tab <- table(samples$ID) | |
229 offenders <- paste(names(tab[tab>1]), collapse=", ") | |
230 offenders <- unmake.names(offenders) | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
231 stop("'ID' column of sample annotation must have unique values, values ", |
2 | 232 offenders, " are repeated") |
233 } # Check that IDs in sample annotation are unique | |
234 | |
235 if (inputType=="fastq") { | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
236 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
237 if (any(table(anno$ID)>1)){ |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
238 tab <- table(anno$ID) |
2 | 239 offenders <- paste(names(tab[tab>1]), collapse=", ") |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
240 stop("'ID' column of hairpin annotation must have unique values, values ", |
2 | 241 offenders, " are repeated") |
242 } # Check that IDs in hairpin annotation are unique | |
4
f8af57d6f60b
Fixed bug causing fastq input to break
shian_su <registertonysu@gmail.com>
parents:
2
diff
changeset
|
243 |
2 | 244 } else if (inputType=="counts") { |
4
f8af57d6f60b
Fixed bug causing fastq input to break
shian_su <registertonysu@gmail.com>
parents:
2
diff
changeset
|
245 if (any(is.na(match(samples$ID, colnames(counts))))) { |
f8af57d6f60b
Fixed bug causing fastq input to break
shian_su <registertonysu@gmail.com>
parents:
2
diff
changeset
|
246 stop("not all samples have groups specified") |
f8af57d6f60b
Fixed bug causing fastq input to break
shian_su <registertonysu@gmail.com>
parents:
2
diff
changeset
|
247 } # Check that a group has be specifed for each sample |
2 | 248 |
249 if (any(table(counts$ID)>1)){ | |
250 tab <- table(counts$ID) | |
251 offenders <- paste(names(tab[tab>1]), collapse=", ") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
252 stop("'ID' column of count table must have unique values, values ", |
2 | 253 offenders, " are repeated") |
254 } # Check that IDs in count table are unique | |
255 } | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
256 if (workMode=="glm") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
257 if (roastOpt == "yes") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
258 if (is.na(match("Gene", colnames(anno)))) { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
259 tempStr <- paste("Gene-wise tests selected but'Gene' column not", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
260 "specified in hairpin annotation file") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
261 stop(tempStr) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
262 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
263 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
264 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
265 |
2 | 266 ################################################################################ |
267 | |
268 # Process arguments | |
269 if (workMode=="glm") { | |
270 if (roastOpt=="yes") { | |
271 wantRoast <- TRUE | |
272 } else { | |
273 wantRoast <- FALSE | |
274 } | |
275 } | |
276 | |
277 # Split up contrasts seperated by comma into a vector and replace spaces with | |
278 # periods | |
279 if (exists("contrastData")) { | |
280 contrastData <- unlist(strsplit(contrastData, split=",")) | |
281 contrastData <- sanitiseEquation(contrastData) | |
282 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) | |
283 } | |
284 | |
285 # Replace spaces with periods in pair data | |
286 if (exists("pairData")) { | |
287 pairData <- make.names(pairData) | |
288 } | |
289 | |
290 # Generate output folder and paths | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
291 dir.create(folderPath, showWarnings=FALSE) |
2 | 292 |
293 # Generate links for outputs | |
294 imgOut("barHairpin") | |
295 imgOut("barIndex") | |
296 imgOut("mds") | |
297 imgOut("bcv") | |
298 if (workMode == "classic") { | |
299 smearPng <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").png")) | |
300 smearPdf <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").pdf")) | |
301 topOut <- makeOut(paste0("toptag(", pairData[2], "-", pairData[1],").tsv")) | |
302 } else if (workMode=="glm") { | |
303 smearPng <- character() | |
304 smearPdf <- character() | |
305 topOut <- character() | |
306 roastOut <- character() | |
307 barcodePng <- character() | |
308 barcodePdf <- character() | |
309 for (i in 1:length(contrastData)) { | |
310 smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png")) | |
311 smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf")) | |
312 topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv")) | |
8 | 313 roastOut[i] <- makeOut(paste0("gene_level(", contrastData[i], ").tsv")) |
2 | 314 barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png")) |
315 barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf")) | |
316 } | |
317 } | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
318 countsOut <- makeOut("counts.tsv") |
8 | 319 sessionOut <- makeOut("session_info.txt") |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
320 |
2 | 321 # Initialise data for html links and images, table with the link label and |
322 # link address | |
323 linkData <- data.frame(Label=character(), Link=character(), | |
324 stringsAsFactors=FALSE) | |
325 imageData <- data.frame(Label=character(), Link=character(), | |
326 stringsAsFactors=FALSE) | |
7 | 327 |
328 # Initialise vectors for storage of up/down/neutral regulated counts | |
329 upCount <- numeric() | |
330 downCount <- numeric() | |
331 flatCount <- numeric() | |
332 | |
2 | 333 ################################################################################ |
334 ### Data Processing | |
335 ################################################################################ | |
336 | |
337 # Transform gene selection from string into index values for mroast | |
338 if (workMode=="glm") { | |
339 if (selectOpt=="rank") { | |
340 selectVals <- gsub(" ", "", selectVals, fixed=TRUE) | |
341 selectVals <- unlist(strsplit(selectVals, ",")) | |
342 | |
343 for (i in 1:length(selectVals)) { | |
344 if (grepl(":", selectVals[i], fixed=TRUE)) { | |
345 temp <- unlist(strsplit(selectVals[i], ":")) | |
346 selectVals <- selectVals[-i] | |
347 a <- as.numeric(temp[1]) | |
348 b <- as.numeric(temp[2]) | |
349 selectVals <- c(selectVals, a:b) | |
350 } | |
351 } | |
352 selectVals <- as.numeric(unique(selectVals)) | |
353 } else { | |
354 selectVals <- gsub(" ", "", selectVals, fixed=TRUE) | |
8 | 355 selectVals <- unlist(strsplit(selectVals, ",")) |
2 | 356 } |
357 } | |
358 | |
359 if (inputType=="fastq") { | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
360 # Use EdgeR hairpin process and capture outputs |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
361 hpReadout <- capture.output( |
11
c0a76e30d61b
- Removed max on cpm filtering. Really shouldn't have been there in the first
shian_su <registertonysu@gmail.com>
parents:
10
diff
changeset
|
362 data <- processHairpinReads(readfile=fastqPath, |
c0a76e30d61b
- Removed max on cpm filtering. Really shouldn't have been there in the first
shian_su <registertonysu@gmail.com>
parents:
10
diff
changeset
|
363 barcodefile=samplePath, |
c0a76e30d61b
- Removed max on cpm filtering. Really shouldn't have been there in the first
shian_su <registertonysu@gmail.com>
parents:
10
diff
changeset
|
364 hairpinfile=annoPath, |
9
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
365 barcodeStart=barStart, barcodeEnd=barEnd, |
2 | 366 hairpinStart=hpStart, hairpinEnd=hpEnd, |
367 verbose=TRUE) | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
368 ) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
369 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
370 # Remove function output entries that show processing data or is empty |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
371 hpReadout <- hpReadout[hpReadout!=""] |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
372 hpReadout <- hpReadout[!grepl("Processing", hpReadout)] |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
373 hpReadout <- hpReadout[!grepl("in file", hpReadout)] |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
374 hpReadout <- gsub(" -- ", "", hpReadout, fixed=TRUE) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
375 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
376 # Make the names of groups syntactically valid (replace spaces with periods) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
377 data$samples$group <- make.names(data$samples$group) |
4
f8af57d6f60b
Fixed bug causing fastq input to break
shian_su <registertonysu@gmail.com>
parents:
2
diff
changeset
|
378 } else if (inputType=="counts") { |
2 | 379 # Process counts information, set ID column to be row names |
380 rownames(counts) <- counts$ID | |
381 counts <- counts[ , !(colnames(counts)=="ID")] | |
382 countsRows <- nrow(counts) | |
383 | |
384 # Process group information | |
385 factors <- samples$group[match(samples$ID, colnames(counts))] | |
386 annoRows <- nrow(anno) | |
387 anno <- anno[match(rownames(counts), anno$ID), ] | |
388 annoMatched <- sum(!is.na(anno$ID)) | |
389 | |
390 if (any(is.na(anno$ID))) { | |
391 warningStr <- paste("count table contained more hairpins than", | |
392 "specified in hairpin annotation file") | |
393 warning(warningStr) | |
394 } | |
395 | |
396 # Filter out rows with zero counts | |
397 sel <- rowSums(counts)!=0 | |
398 counts <- counts[sel, ] | |
399 anno <- anno[sel, ] | |
400 | |
401 # Create DGEList | |
402 data <- DGEList(counts=counts, lib.size=colSums(counts), | |
403 norm.factors=rep(1,ncol(counts)), genes=anno, group=factors) | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
404 |
2 | 405 # Make the names of groups syntactically valid (replace spaces with periods) |
406 data$samples$group <- make.names(data$samples$group) | |
407 } | |
408 | |
10
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
409 # Filter out any samples with zero counts |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
410 if (any(data$samples$lib.size == 0)) { |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
411 sampleSel <- data$samples$lib.size != 0 |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
412 filteredSamples <- paste(data$samples$ID[!sampleSel], collapse=", ") |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
413 data$counts <- data$counts[, sampleSel] |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
414 data$samples <- data$samples[sampleSel, ] |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
415 } |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
416 |
2 | 417 # Filter hairpins with low counts |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
418 preFilterCount <- nrow(data) |
2 | 419 sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq |
420 data <- data[sel, ] | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
421 postFilterCount <- nrow(data) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
422 filteredCount <- preFilterCount-postFilterCount |
2 | 423 |
424 # Estimate dispersions | |
425 data <- estimateDisp(data) | |
426 commonBCV <- sqrt(data$common.dispersion) | |
427 | |
428 ################################################################################ | |
429 ### Output Processing | |
430 ################################################################################ | |
431 | |
432 # Plot number of hairpins that could be matched per sample | |
433 png(barIndexPng, width=600, height=600) | |
434 barplot(height<-colSums(data$counts), las=2, main="Counts per index", | |
435 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
436 imageData[1, ] <- c("Counts per Index", "barIndex.png") | |
437 invisible(dev.off()) | |
438 | |
439 pdf(barIndexPdf) | |
440 barplot(height<-colSums(data$counts), las=2, main="Counts per index", | |
441 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
442 linkData[1, ] <- c("Counts per Index Barplot (.pdf)", "barIndex.pdf") | |
443 invisible(dev.off()) | |
444 | |
445 # Plot per hairpin totals across all samples | |
446 png(barHairpinPng, width=600, height=600) | |
447 if (nrow(data$counts)<50) { | |
448 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
449 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
450 } else { | |
451 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
452 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2), | |
453 names.arg=FALSE) | |
454 } | |
455 imageData <- rbind(imageData, c("Counts per Hairpin", "barHairpin.png")) | |
456 invisible(dev.off()) | |
457 | |
458 pdf(barHairpinPdf) | |
459 if (nrow(data$counts)<50) { | |
460 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
461 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
462 } else { | |
463 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
464 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2), | |
465 names.arg=FALSE) | |
466 } | |
467 newEntry <- c("Counts per Hairpin Barplot (.pdf)", "barHairpin.pdf") | |
468 linkData <- rbind(linkData, newEntry) | |
469 invisible(dev.off()) | |
470 | |
471 # Make an MDS plot to visualise relationships between replicate samples | |
472 png(mdsPng, width=600, height=600) | |
473 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group), | |
474 main="MDS Plot") | |
475 imageData <- rbind(imageData, c("MDS Plot", "mds.png")) | |
476 invisible(dev.off()) | |
477 | |
478 pdf(mdsPdf) | |
479 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group), | |
480 main="MDS Plot") | |
481 newEntry <- c("MDS Plot (.pdf)", "mds.pdf") | |
482 linkData <- rbind(linkData, newEntry) | |
483 invisible(dev.off()) | |
484 | |
9
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
485 # BCV Plot |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
486 png(bcvPng, width=600, height=600) |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
487 plotBCV(data, main="BCV Plot") |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
488 imageData <- rbind(imageData, c("BCV Plot", "bcv.png")) |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
489 invisible(dev.off()) |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
490 |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
491 pdf(bcvPdf) |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
492 plotBCV(data, main="BCV Plot") |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
493 newEntry <- c("BCV Plot (.pdf)", "bcv.pdf") |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
494 invisible(dev.off()) |
f1076bfb0ed1
- Fixed tool to actually make use of barcode location options
shian_su <registertonysu@gmail.com>
parents:
8
diff
changeset
|
495 |
2 | 496 if (workMode=="classic") { |
497 # Assess differential representation using classic exact testing methodology | |
498 # in edgeR | |
499 testData <- exactTest(data, pair=pairData) | |
500 | |
501 top <- topTags(testData, n=Inf) | |
502 topIDs <- top$table[(top$table$FDR < fdrThresh) & | |
503 (abs(top$table$logFC) > lfcThresh), 1] | |
7 | 504 |
2 | 505 write.table(top, file=topOut, row.names=FALSE, sep="\t") |
7 | 506 |
2 | 507 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], |
508 ") (.tsv)") | |
509 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") | |
510 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
511 | |
7 | 512 upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) |
513 downCount[1] <- sum(top$table$FDR < fdrThresh & | |
514 top$table$logFC < -lfcThresh) | |
515 flatCount[1] <- sum(top$table$FDR > fdrThresh | | |
516 abs(top$table$logFC) < lfcThresh) | |
517 | |
518 | |
519 | |
2 | 520 # Select hairpins with FDR < 0.05 to highlight on plot |
521 png(smearPng, width=600, height=600) | |
522 plotTitle <- gsub(".", " ", | |
523 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | |
7 | 524 fixed=TRUE) |
2 | 525 plotSmear(testData, de.tags=topIDs, |
526 pch=20, cex=1.0, main=plotTitle) | |
7 | 527 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
2 | 528 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") |
529 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") | |
530 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
531 invisible(dev.off()) | |
532 | |
533 pdf(smearPdf) | |
534 plotTitle <- gsub(".", " ", | |
535 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | |
7 | 536 fixed=TRUE) |
2 | 537 plotSmear(testData, de.tags=topIDs, |
538 pch=20, cex=1.0, main=plotTitle) | |
7 | 539 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
2 | 540 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") |
541 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") | |
542 linkData <- rbind(linkData, c(imgName, imgAddr)) | |
543 invisible(dev.off()) | |
7 | 544 |
2 | 545 } else if (workMode=="glm") { |
546 # Generating design information | |
547 factors <- factor(data$sample$group) | |
548 design <- model.matrix(~0+factors) | |
549 | |
550 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) | |
551 | |
552 # Split up contrasts seperated by comma into a vector | |
553 contrastData <- unlist(strsplit(contrastData, split=",")) | |
7 | 554 |
2 | 555 for (i in 1:length(contrastData)) { |
556 # Generate contrasts information | |
557 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) | |
558 | |
559 # Fit negative bionomial GLM | |
7 | 560 fit <- glmFit(data, design) |
2 | 561 # Carry out Likelihood ratio test |
7 | 562 testData <- glmLRT(fit, contrast=contrasts) |
2 | 563 |
564 # Select hairpins with FDR < 0.05 to highlight on plot | |
565 top <- topTags(testData, n=Inf) | |
566 topIDs <- top$table[(top$table$FDR < fdrThresh) & | |
567 (abs(top$table$logFC) > lfcThresh), 1] | |
568 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") | |
569 | |
570 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)") | |
571 linkAddr <- paste0("toptag(", contrastData[i], ").tsv") | |
572 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
573 | |
7 | 574 # Collect counts for differential representation |
575 upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) | |
576 downCount[i] <- sum(top$table$FDR < fdrThresh & | |
577 top$table$logFC < -lfcThresh) | |
578 flatCount[i] <- sum(top$table$FDR > fdrThresh | | |
579 abs(top$table$logFC) < lfcThresh) | |
580 | |
2 | 581 # Make a plot of logFC versus logCPM |
582 png(smearPng[i], height=600, width=600) | |
583 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], | |
584 fixed=TRUE)) | |
585 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle) | |
586 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) | |
587 | |
588 imgName <- paste0("Smear Plot(", contrastData[i], ")") | |
589 imgAddr <- paste0("smear(", contrastData[i], ").png") | |
590 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
591 invisible(dev.off()) | |
592 | |
593 pdf(smearPdf[i]) | |
594 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], | |
595 fixed=TRUE)) | |
596 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle) | |
597 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) | |
598 | |
599 linkName <- paste0("Smear Plot(", contrastData[i], ") (.pdf)") | |
600 linkAddr <- paste0("smear(", contrastData[i], ").pdf") | |
601 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
602 invisible(dev.off()) | |
603 | |
604 genes <- as.character(data$genes$Gene) | |
605 unq <- unique(genes) | |
606 unq <- unq[!is.na(unq)] | |
607 geneList <- list() | |
608 for (gene in unq) { | |
609 if (length(which(genes==gene)) >= hairpinReq) { | |
610 geneList[[gene]] <- which(genes==gene) | |
611 } | |
612 } | |
613 | |
614 if (wantRoast) { | |
615 # Input preparaton for roast | |
7 | 616 nrot <- 9999 |
2 | 617 set.seed(602214129) |
618 roastData <- mroast(data, index=geneList, design=design, | |
619 contrast=contrasts, nrot=nrot) | |
620 roastData <- cbind(GeneID=rownames(roastData), roastData) | |
621 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") | |
622 linkName <- paste0("Gene Level Analysis Table(", contrastData[i], | |
623 ") (.tsv)") | |
8 | 624 linkAddr <- paste0("gene_level(", contrastData[i], ").tsv") |
2 | 625 linkData <- rbind(linkData, c(linkName, linkAddr)) |
626 if (selectOpt=="rank") { | |
627 selectedGenes <- rownames(roastData)[selectVals] | |
628 } else { | |
629 selectedGenes <- selectVals | |
630 } | |
631 | |
632 if (packageVersion("limma")<"3.19.19") { | |
633 png(barcodePng[i], width=600, height=length(selectedGenes)*150) | |
634 } else { | |
635 png(barcodePng[i], width=600, height=length(selectedGenes)*300) | |
636 } | |
637 par(mfrow=c(length(selectedGenes), 1)) | |
638 for (gene in selectedGenes) { | |
639 barcodeplot(testData$table$logFC, index=geneList[[gene]], | |
640 main=paste("Barcode Plot for", gene, "(logFCs)", | |
641 gsub(".", " ", contrastData[i])), | |
642 labels=c("Positive logFC", "Negative logFC")) | |
643 } | |
644 imgName <- paste0("Barcode Plot(", contrastData[i], ")") | |
645 imgAddr <- paste0("barcode(", contrastData[i], ").png") | |
646 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
647 dev.off() | |
648 if (packageVersion("limma")<"3.19.19") { | |
649 pdf(barcodePdf[i], width=8, height=2) | |
650 } else { | |
651 pdf(barcodePdf[i], width=8, height=4) | |
652 } | |
653 for (gene in selectedGenes) { | |
654 barcodeplot(testData$table$logFC, index=geneList[[gene]], | |
655 main=paste("Barcode Plot for", gene, "(logFCs)", | |
656 gsub(".", " ", contrastData[i])), | |
657 labels=c("Positive logFC", "Negative logFC")) | |
658 } | |
659 linkName <- paste0("Barcode Plot(", contrastData[i], ") (.pdf)") | |
660 linkAddr <- paste0("barcode(", contrastData[i], ").pdf") | |
661 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
662 dev.off() | |
663 } | |
664 } | |
665 } | |
666 | |
8 | 667 # Generate data frame of the significant differences |
7 | 668 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) |
669 if (workMode == "glm") { | |
670 row.names(sigDiff) <- contrastData | |
671 } else if (workMode == "classic") { | |
672 row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1]) | |
673 } | |
674 | |
8 | 675 # Output table of summarised counts |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
676 ID <- rownames(data$counts) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
677 outputCounts <- cbind(ID, data$counts) |
8 | 678 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t", |
679 quote=FALSE) | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
680 linkName <- "Counts table (.tsv)" |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
681 linkAddr <- "counts.tsv" |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
682 linkData <- rbind(linkData, c(linkName, linkAddr)) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
683 |
8 | 684 # Record session info |
685 writeLines(capture.output(sessionInfo()), sessionOut) | |
686 linkData <- rbind(linkData, c("Session Info", "session_info.txt")) | |
687 | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
688 # Record ending time and calculate total run time |
2 | 689 timeEnd <- as.character(Sys.time()) |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
690 timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3)) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
691 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE) |
2 | 692 ################################################################################ |
693 ### HTML Generation | |
694 ################################################################################ | |
695 # Clear file | |
696 cat("", file=htmlPath) | |
697 | |
698 cata("<html>\n") | |
699 HtmlHead("EdgeR Output") | |
700 | |
701 cata("<body>\n") | |
702 cata("<h3>EdgeR Analysis Output:</h3>\n") | |
703 cata("<h4>Input Summary:</h4>\n") | |
704 if (inputType=="fastq") { | |
705 cata("<ul>\n") | |
706 ListItem(hpReadout[1]) | |
707 ListItem(hpReadout[2]) | |
708 cata("</ul>\n") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
709 cata(hpReadout[3], "<br />\n") |
2 | 710 cata("<ul>\n") |
711 ListItem(hpReadout[4]) | |
712 ListItem(hpReadout[7]) | |
713 cata("</ul>\n") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
714 cata(hpReadout[8:11], sep="<br />\n") |
2 | 715 cata("<br />\n") |
716 cata("<b>Please check that read percentages are consistent with ") | |
717 cata("expectations.</b><br >\n") | |
718 } else if (inputType=="counts") { | |
719 cata("<ul>\n") | |
720 ListItem("Number of Samples: ", ncol(data$counts)) | |
721 ListItem("Number of Hairpins: ", countsRows) | |
722 ListItem("Number of annotations provided: ", annoRows) | |
723 ListItem("Number of annotations matched to hairpin: ", annoMatched) | |
724 cata("</ul>\n") | |
725 } | |
726 | |
727 cata("The estimated common biological coefficient of variation (BCV) is: ", | |
728 commonBCV, "<br />\n") | |
729 | |
730 cata("<h4>Output:</h4>\n") | |
7 | 731 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") |
2 | 732 for (i in 1:nrow(imageData)) { |
733 if (grepl("barcode", imageData$Link[i])) { | |
734 if (packageVersion("limma")<"3.19.19") { | |
735 HtmlImage(imageData$Link[i], imageData$Label[i], | |
736 height=length(selectedGenes)*150) | |
737 } else { | |
738 HtmlImage(imageData$Link[i], imageData$Label[i], | |
739 height=length(selectedGenes)*300) | |
740 } | |
741 } else { | |
742 HtmlImage(imageData$Link[i], imageData$Label[i]) | |
743 } | |
744 } | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
745 cata("<br />\n") |
2 | 746 |
7 | 747 cata("<h4>Differential Representation Counts:</h4>\n") |
748 | |
749 cata("<table border=\"1\" cellpadding=\"4\">\n") | |
750 cata("<tr>\n") | |
751 TableItem() | |
752 for (i in colnames(sigDiff)) { | |
753 TableHeadItem(i) | |
754 } | |
755 cata("</tr>\n") | |
756 for (i in 1:nrow(sigDiff)) { | |
757 cata("<tr>\n") | |
758 TableHeadItem(unmake.names(row.names(sigDiff)[i])) | |
759 for (j in 1:ncol(sigDiff)) { | |
760 TableItem(as.character(sigDiff[i, j])) | |
761 } | |
762 cata("</tr>\n") | |
763 } | |
764 cata("</table>") | |
765 | |
2 | 766 cata("<h4>Plots:</h4>\n") |
767 for (i in 1:nrow(linkData)) { | |
8 | 768 if (grepl(".pdf", linkData$Link[i])) { |
2 | 769 HtmlLink(linkData$Link[i], linkData$Label[i]) |
770 } | |
771 } | |
772 | |
773 cata("<h4>Tables:</h4>\n") | |
774 for (i in 1:nrow(linkData)) { | |
775 if (grepl(".tsv", linkData$Link[i])) { | |
776 HtmlLink(linkData$Link[i], linkData$Label[i]) | |
777 } | |
778 } | |
779 | |
7 | 780 cata("<p>Alt-click links to download file.</p>\n") |
781 cata("<p>Click floppy disc icon associated history item to download ") | |
782 cata("all files.</p>\n") | |
783 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
784 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
785 cata("<h4>Additional Information:</h4>\n") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
786 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
787 if (inputType == "fastq") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
788 ListItem("Data was gathered from fastq raw read file(s).") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
789 } else if (inputType == "counts") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
790 ListItem("Data was gathered from a table of counts.") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
791 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
792 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
793 if (cpmReq!=0 && sampleReq!=0) { |
8 | 794 tempStr <- paste("Hairpins without more than", cpmReq, |
7 | 795 "CPM in at least", sampleReq, "samples are insignificant", |
796 "and filtered out.") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
797 ListItem(tempStr) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
798 filterProp <- round(filteredCount/preFilterCount*100, digits=2) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
799 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
800 "%) hairpins were filtered out for low count-per-million.") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
801 ListItem(tempStr) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
802 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
803 |
10
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
804 if (exists("filteredSamples")) { |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
805 tempStr <- paste("The following samples were filtered out for having zero", |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
806 "library size: ", filteredSamples) |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
807 ListItem(tempStr) |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
808 } |
8923d4ea858b
- Added check for zero library size, will now filter out zero library size
shian_su <registertonysu@gmail.com>
parents:
9
diff
changeset
|
809 |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
810 if (workMode == "classic") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
811 ListItem("An exact test was performed on each hairpin.") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
812 } else if (workMode == "glm") { |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
813 ListItem("A generalised linear model was fitted to each hairpin.") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
814 } |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
815 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
816 cit <- character() |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
817 link <-character() |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
818 link[1] <- paste0("<a href=\"", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
819 "http://www.bioconductor.org/packages/release/bioc/", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
820 "vignettes/limma/inst/doc/usersguide.pdf", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
821 "\">", "limma User's Guide", "</a>.") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
822 link[2] <- paste0("<a href=\"", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
823 "http://www.bioconductor.org/packages/release/bioc/", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
824 "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
825 "\">", "edgeR User's Guide", "</a>") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
826 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
827 cit[1] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010).", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
828 "edgeR: a Bioconductor package for differential", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
829 "expression analysis of digital gene expression", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
830 "data. Bioinformatics 26, 139-140") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
831 cit[2] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
832 "for assessing differences in tag abundance. Bioinformatics", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
833 "23, 2881-2887") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
834 cit[3] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
835 "negative binomial dispersion, with applications to SAGE data.", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
836 "Biostatistics, 9, 321-332") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
837 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
838 cit[4] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
839 "expression analysis of multifactor RNA-Seq experiments with", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
840 "respect to biological variation. Nucleic Acids Research 40,", |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
841 "4288-4297") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
842 |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
843 cata("<h4>Citations</h4>") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
844 cata("<ol>\n") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
845 ListItem(cit[1]) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
846 ListItem(cit[2]) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
847 ListItem(cit[3]) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
848 ListItem(cit[4]) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
849 cata("</ol>\n") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
850 |
8 | 851 cata("<p>Report problems to: su.s@wehi.edu.au</p>\n") |
852 | |
853 for (i in 1:nrow(linkData)) { | |
854 if (grepl("session_info", linkData$Link[i])) { | |
855 HtmlLink(linkData$Link[i], linkData$Label[i]) | |
856 } | |
857 } | |
858 | |
2 | 859 cata("<table border=\"0\">\n") |
860 cata("<tr>\n") | |
861 TableItem("Task started at:"); TableItem(timeStart) | |
862 cata("</tr>\n") | |
863 cata("<tr>\n") | |
864 TableItem("Task ended at:"); TableItem(timeEnd) | |
865 cata("</tr>\n") | |
6
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
866 cata("<tr>\n") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
867 TableItem("Task run time:"); TableItem(timeTaken) |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
868 cata("<tr>\n") |
3d04308a99f9
- Added differentially expressed hairpin count output
shian_su <registertonysu@gmail.com>
parents:
4
diff
changeset
|
869 cata("</table>\n") |
2 | 870 |
871 cata("</body>\n") | |
872 cata("</html>") |