comparison edgeR.pl @ 11:e5fcbabbdea7 draft

Uploaded
author fcaramia
date Wed, 21 Aug 2013 22:13:03 -0400
parents 674c75219f15
children
comparison
equal deleted inserted replaced
10:86292c2b0ba9 11:e5fcbabbdea7
6 use File::Basename; 6 use File::Basename;
7 use File::Path qw(make_path remove_tree); 7 use File::Path qw(make_path remove_tree);
8 $| = 1; 8 $| = 1;
9 9
10 # Grab and set all options 10 # Grab and set all options
11 my %OPTIONS = (a => "glm", d => "tag", f => "BH", p => 0.3, r => 5, u => "movingave"); 11 my %OPTIONS = (a => "glm", d => "tag", f => "BH", r => 5, u => "movingave");
12 12
13 getopts('a:d:e:f:h:lmn:o:p:r:tu:', \%OPTIONS); 13 getopts('a:d:e:f:h:lmn:o:r:tu:', \%OPTIONS);
14 14
15 die qq( 15 die qq(
16 Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix 16 Usage: edgeR.pl [OPTIONS] factor::factor1::levels [factor::factor2::levels ...] cp::cont_pred1::values [cp::cont_pred2::values ...] cnt::contrast1 [cnt::contrast2] matrix
17 17
18 OPTIONS: -a STR Type Of Analysis [glm, pw, limma] (default: $OPTIONS{a}) 18 OPTIONS: -a STR Type Of Analysis [glm, pw, limma] (default: $OPTIONS{a})
22 -h STR Name of html file for additional files 22 -h STR Name of html file for additional files
23 -l Output the normalised digital gene expression matrix in log2 format (only applicable when using limma and -n is also specified) 23 -l Output the normalised digital gene expression matrix in log2 format (only applicable when using limma and -n is also specified)
24 -m Perform all pairwise comparisons 24 -m Perform all pairwise comparisons
25 -n STR File name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model) 25 -n STR File name to output the normalised digital gene expression matrix (only applicable when usinf glm or limma model)
26 -o STR File name to output csv file with results 26 -o STR File name to output csv file with results
27 -p FLT The proportion of all tags/genes to be used for the locally weighted estimation of the tagwise dispersion, ony applicable when 1 factor analysis selected (default: $OPTIONS{p})
28 -r INT Common Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r}) 27 -r INT Common Dispersion Rowsum Filter, ony applicable when 1 factor analysis selected (default: $OPTIONS{r})
29 -t Estimate Tagwise Disp when performing 1 factor analysis 28 -t Estimate Tagwise Disp when performing 1 factor analysis
30 -u STR Method for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave", "tricube", "none"] (default: $OPTIONS{u}) 29 -u STR Method for allowing the prior distribution for the dispersion to be abundance- dependent ["movingave", "tricube", "none"] (default: $OPTIONS{u})
31 30
32 ) if(!@ARGV); 31 ) if(!@ARGV);
33 32
34 my $matrix = pop @ARGV; 33 my $matrix = pop @ARGV;
35 34
36 make_path($OPTIONS{e}); 35 make_path($OPTIONS{e});
37 open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\n\n"; 36 open(Rcmd,">$OPTIONS{e}/r_script.R") or die "Cannot open $OPTIONS{e}/r_script.R\n\n";
38 print Rcmd " 37 print Rcmd "
39 zz <- file(\"$OPTIONS{e}/r_script.err\", open=\"wt\") 38 zz <- file(\"$OPTIONS{e}/r_script.err\", open=\"wt\")
40 sink(zz) 39 sink(zz)
41 sink(zz, type=\"message\") 40 sink(zz, type=\"message\")
42 41
43 library(edgeR) 42 library(edgeR)
44 library(limma) 43 library(limma)
45 44
46 # read in matrix and groups 45 # read in matrix and groups
47 toc <- read.table(\"$matrix\", sep=\"\\t\", comment=\"\", as.is=T) 46 toc <- read.table(\"$matrix\", sep=\"\\t\", comment=\"\", as.is=T)
48 groups <- sapply(toc[1, -1], strsplit, \":\") 47 groups <- sapply(toc[1, -1], strsplit, \":\")
49 for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] } 48 for(i in 1:length(groups)) { g <- make.names(groups[[i]][2]); names(groups)[i] <- g; groups[[i]] <- groups[[i]][-2] }
50 colnames(toc) <- make.names(toc[2,]) 49 colnames(toc) <- make.names(toc[2,])
51 toc[,1] <- gsub(\",\", \".\", toc[,1]) 50 toc[,1] <- gsub(\",\", \".\", toc[,1])
52 tagnames <- toc[-(1:2), 1] 51 tagnames <- toc[-(1:2), 1]
53 rownames(toc) <- toc[,1] 52 rownames(toc) <- toc[,1]
54 toc <- toc[-(1:2), -1] 53 toc <- toc[-(1:2), -1]
55 for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i]) 54 for(i in colnames(toc)) toc[, i] <- as.numeric(toc[,i])
56 norm_factors <- calcNormFactors(as.matrix(toc)) 55 norm_factors <- calcNormFactors(as.matrix(toc))
57 56
58 pw_tests <- list() 57 pw_tests <- list()
59 uniq_groups <- unique(names(groups)) 58 uniq_groups <- unique(names(groups))
60 for(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j]) 59 for(i in 1:(length(uniq_groups)-1)) for(j in (i+1):length(uniq_groups)) pw_tests[[length(pw_tests)+1]] <- c(uniq_groups[i], uniq_groups[j])
61 DGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups)) 60 DGE <- DGEList(toc, lib.size=norm_factors*colSums(toc), group=names(groups))
62 pdf(\"$OPTIONS{e}/MA_plots_normalisation.pdf\", width=14) 61 pdf(\"$OPTIONS{e}/MA_plots_normalisation.pdf\", width=14)
63 for(i in 1:length(pw_tests)) { 62 for(i in 1:length(pw_tests)) {
64 j <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1]) 63 j <- c(which(names(groups) == pw_tests[[i]][1])[1], which(names(groups) == pw_tests[[i]][2])[1])
65 par(mfrow = c(1, 2)) 64 par(mfrow = c(1, 2))
66 maPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]])) 65 maPlot(toc[, j[1]], toc[, j[2]], normalize = TRUE, pch = 19, cex = 0.2, ylim = c(-10, 10), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]]))
67 grid(col = \"blue\") 66 grid(col = \"blue\")
68 abline(h = log2(norm_factors[j[2]]), col = \"red\", lwd = 4) 67 abline(h = log2(norm_factors[j[2]]), col = \"red\", lwd = 4)
69 maPlot(DGE\$counts[, j[1]]/DGE\$samples\$lib.size[j[1]], DGE\$counts[, j[2]]/DGE\$samples\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]], \"Normalised\")) 68 maPlot(DGE\$counts[, j[1]]/DGE\$samples\$lib.size[j[1]], DGE\$counts[, j[2]]/DGE\$samples\$lib.size[j[2]], normalize = FALSE, pch = 19, cex = 0.2, ylim = c(-8, 8), main=paste(\"MA Plot\", colnames(toc)[j[1]], \"vs\", colnames(toc)[j[2]], \"Normalised\"))
70 grid(col = \"blue\") 69 grid(col = \"blue\")
71 } 70 }
72 dev.off() 71 dev.off()
73 pdf(file=\"$OPTIONS{e}/MDSplot.pdf\") 72 pdf(file=\"$OPTIONS{e}/MDSplot.pdf\")
74 plotMDS(DGE, main=\"MDS Plot\", col=as.numeric(factor(names(groups)))+1, xlim=c(-3,3)) 73 plotMDS(DGE, main=\"MDS Plot\", col=as.numeric(factor(names(groups)))+1, xlim=c(-3,3))
75 dev.off() 74 dev.off()
76 tested <- list() 75 tested <- list()
77 "; 76 ";
78 77
79 my $all_cont; 78 my $all_cont;
80 my @add_cont; 79 my @add_cont;
81 my @fact; 80 my @fact;
104 } 103 }
105 } 104 }
106 105
107 if($OPTIONS{a} eq "pw") { 106 if($OPTIONS{a} eq "pw") {
108 print Rcmd " 107 print Rcmd "
109 disp <- estimateCommonDisp(DGE, rowsum.filter=$OPTIONS{r}) 108 disp <- estimateCommonDisp(DGE, rowsum.filter=$OPTIONS{r})
110 "; 109 ";
111 if(defined $OPTIONS{t}) { 110 if(defined $OPTIONS{t}) {
112 print Rcmd " 111 print Rcmd "
113 disp <- estimateTagwiseDisp(disp, trend=\"$OPTIONS{u}\", prop.used=$OPTIONS{p}) 112 disp <- estimateTagwiseDisp(disp, trend=\"$OPTIONS{u}\")
114 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") 113 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\")
115 plot(log2(1e06*disp\$conc\$conc.common), disp\$tagwise.dispersion, xlab=\"Counts per million (log2 scale)\", ylab=\"Tagwise dispersion\") 114 plotBCV(disp, cex=0.4)
116 abline(h=disp\$common.dispersion, col=\"firebrick\", lwd=3) 115 abline(h=disp\$common.dispersion, col=\"firebrick\", lwd=3)
117 dev.off() 116 dev.off()
118 " 117 ";
119 } 118 }
120 print Rcmd " 119 print Rcmd "
121 for(i in 1:length(pw_tests)) { 120 for(i in 1:length(pw_tests)) {
122 tested[[i]] <- exactTest(disp, pair=pw_tests[[i]]) 121 tested[[i]] <- exactTest(disp, pair=pw_tests[[i]])
123 names(tested)[i] <- paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\") 122 names(tested)[i] <- paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")
124 } 123 }
125 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\") 124 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\")
126 for(i in 1:length(pw_tests)) { 125 for(i in 1:length(pw_tests)) {
127 if(nrow(decideTestsDGE(tested[[i]] , p.value=0.05)) > 0) { 126 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\")
128 de_tags <- rownames(decideTestsDGE(tested[[i]] , p.value=0.05, adjust.method=\"$OPTIONS{f}\")) 127 if(sum(dt) > 0) {
129 ttl <- \"(Diff. Exp. Genes With adj. Pvalue < 0.05 highlighted)\" 128 de_tags <- rownames(disp)[which(dt != 0)]
130 } else { 129 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\"
131 de_tags <- rownames(topTags(tested[[i]], n=100)\$table) 130 } else {
132 ttl <- \"(Top 100 tags highlighted)\" 131 de_tags <- rownames(topTags(tested[[i]], n=100)\$table)
133 } 132 ttl <- \"Top 100 tags\"
134 133 }
135 plotSmear(disp, pair=pw_tests[[i]], de.tags = de_tags, main = paste(\"FC plot\", ttl)) 134
136 abline(h = c(-2, 2), col = \"dodgerblue\") 135 if(length(dt) < 5000) {
137 } 136 pointcex = 0.5
138 dev.off() 137 } else {
139 "; 138 pointcex = 0.2
139 }
140 plotSmear(disp, pair=pw_tests[[i]], de.tags = de_tags, main = paste(\"Smear Plot\", names(tested)[i]), cex=0.5)
141 abline(h = c(-1, 1), col = \"blue\")
142 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\")
143 }
144 dev.off()
145 ";
140 } elsif($OPTIONS{a} eq "glm") { 146 } elsif($OPTIONS{a} eq "glm") {
141 for(my $fct = 0; $fct <= $#fact_names; $fct++) { 147 for(my $fct = 0; $fct <= $#fact_names; $fct++) {
142 print Rcmd " 148 print Rcmd "
143 $fact_names[$fct] <- c($fact[$fct]) 149 $fact_names[$fct] <- c($fact[$fct])
144 "; 150 ";
145 } 151 }
146 for(my $fct = 0; $fct <= $#cp_names; $fct++) { 152 for(my $fct = 0; $fct <= $#cp_names; $fct++) {
147 print Rcmd " 153 print Rcmd "
148 $cp_names[$fct] <- c($cp[$fct]) 154 $cp_names[$fct] <- c($cp[$fct])
149 "; 155 ";
150 } 156 }
151 my $all_fact = ""; 157 my $all_fact = "";
152 if(@fact_names) { 158 if(@fact_names) {
153 foreach (@fact_names) { 159 foreach (@fact_names) {
154 $all_fact .= " + factor($_)"; 160 $all_fact .= " + factor($_)";
157 my $all_cp = ""; 163 my $all_cp = "";
158 if(@cp_names) { 164 if(@cp_names) {
159 $all_cp = " + ".join(" + ", @cp_names); 165 $all_cp = " + ".join(" + ", @cp_names);
160 } 166 }
161 print Rcmd " 167 print Rcmd "
162 group_fact <- factor(names(groups)) 168 group_fact <- factor(names(groups))
163 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) 169 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp})
164 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) 170 colnames(design) <- sub(\"group_fact\", \"\", colnames(design))
165 "; 171 ";
166 foreach my $fct (@fact_names) { 172 foreach my $fct (@fact_names) {
167 print Rcmd " 173 print Rcmd "
168 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) 174 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design)))
169 "; 175 ";
170 } 176 }
171 print Rcmd " 177 print Rcmd "
172 disp <- estimateGLMCommonDisp(DGE, design) 178 disp <- estimateGLMCommonDisp(DGE, design)
173 "; 179 ";
174 if($OPTIONS{d} eq "tag" || $OPTIONS{d} eq "trend") { 180 if($OPTIONS{d} eq "tag" || $OPTIONS{d} eq "trend") {
175 print Rcmd " 181 print Rcmd "
176 disp <- estimateGLMTrendedDisp(disp, design) 182 disp <- estimateGLMTrendedDisp(disp, design)
177 "; 183 ";
178 } 184 }
179 if($OPTIONS{d} eq "tag") { 185 if($OPTIONS{d} eq "tag") {
180 print Rcmd " 186 print Rcmd "
181 disp <- estimateGLMTagwiseDisp(disp, design) 187 disp <- estimateGLMTagwiseDisp(disp, design)
182 fit <- glmFit(disp, design) 188 fit <- glmFit(disp, design)
183 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\") 189 pdf(file=\"$OPTIONS{e}/Tagwise_Dispersion_vs_Abundance.pdf\")
184 plot(fit\$abund+log(1e06), sqrt(disp\$tagwise.dispersion), xlab=\"Counts per million (log2 scale)\", ylab=\"Tagwise dispersion\") 190 plotBCV(disp, cex=0.4)
185 oo <- order(disp\$abundance) 191 dev.off()
186 lines(fit\$abundance[oo]+log(1e06), sqrt(disp\$trended.dispersion[oo]), col=\"dodgerblue\", lwd=3) 192 ";
187 abline(h=sqrt(disp\$common.dispersion), col=\"firebrick\", lwd=3)
188 dev.off()
189 ";
190 } 193 }
191 if(@add_cont) { 194 if(@add_cont) {
192 $all_cont = "\"".join("\", \"", @add_cont)."\""; 195 $all_cont = "\"".join("\", \"", @add_cont)."\"";
193 print Rcmd " 196 print Rcmd "
194 cont <- c(${all_cont}) 197 cont <- c(${all_cont})
195 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) 198 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont)
196 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) 199 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont)
197 "; 200 ";
198 } else { 201 } else {
199 print Rcmd " 202 print Rcmd "
200 cont <- NULL 203 cont <- NULL
201 "; 204 ";
202 } 205 }
203 if(defined $OPTIONS{m}) { 206 if(defined $OPTIONS{m}) {
204 print Rcmd " 207 print Rcmd "
205 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) 208 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\"))
206 "; 209 ";
207 } 210 }
208 if(!defined $OPTIONS{m} && !@add_cont){ 211 if(!defined $OPTIONS{m} && !@add_cont){
209 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); 212 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n");
210 } 213 }
211 print Rcmd " 214 print Rcmd "
212 fit <- glmFit(disp, design) 215 fit <- glmFit(disp, design)
213 cont <- makeContrasts(contrasts=cont, levels=design) 216 cont <- makeContrasts(contrasts=cont, levels=design)
214 for(i in colnames(cont)) tested[[i]] <- glmLRT(disp, fit, contrast=cont[,i]) 217 for(i in colnames(cont)) tested[[i]] <- glmLRT(fit, contrast=cont[,i])
215 "; 218 pdf(file=\"$OPTIONS{e}/Smear_Plots.pdf\")
219 for(i in colnames(cont)) {
220 dt <- decideTestsDGE(tested[[i]], p.value=0.05, adjust.method=\"$OPTIONS{f}\")
221 if(sum(dt) > 0) {
222 de_tags <- rownames(disp)[which(dt != 0)]
223 ttl <- \"Diff. Exp. Genes With adj. Pvalue < 0.05\"
224 } else {
225 de_tags <- rownames(topTags(tested[[i]], n=100)\$table)
226 ttl <- \"Top 100 tags\"
227 }
228
229 if(length(dt) < 5000) {
230 pointcex = 0.5
231 } else {
232 pointcex = 0.2
233 }
234 plotSmear(disp, de.tags = de_tags, main = paste(\"Smear Plot\", i), cex=pointcex)
235 abline(h = c(-1, 1), col = \"blue\")
236 legend(\"topright\", c(\"2 Fold Change\", ttl) , lty=c(1, NA), pch=c(NA, 19), pt.cex=0.5, col=c(\"blue\", \"red\"), bty=\"n\")
237 }
238 dev.off()
239 ";
216 if(defined $OPTIONS{n}) { 240 if(defined $OPTIONS{n}) {
217 print Rcmd " 241 print Rcmd "
218 tab <- data.frame(ID=rownames(fit\$fitted.values), fit\$fitted.values, stringsAsFactors=F) 242 tab <- data.frame(ID=rownames(fit\$fitted.values), fit\$fitted.values, stringsAsFactors=F)
219 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) 243 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F)
220 "; 244 ";
221 } 245 }
222 } elsif($OPTIONS{a} eq "limma") { 246 } elsif($OPTIONS{a} eq "limma") {
223 for(my $fct = 0; $fct <= $#fact_names; $fct++) { 247 for(my $fct = 0; $fct <= $#fact_names; $fct++) {
224 print Rcmd " 248 print Rcmd "
225 $fact_names[$fct] <- c($fact[$fct]) 249 $fact_names[$fct] <- c($fact[$fct])
226 "; 250 ";
227 } 251 }
228 for(my $fct = 0; $fct <= $#cp_names; $fct++) { 252 for(my $fct = 0; $fct <= $#cp_names; $fct++) {
229 print Rcmd " 253 print Rcmd "
230 $cp_names[$fct] <- c($cp[$fct]) 254 $cp_names[$fct] <- c($cp[$fct])
231 "; 255 ";
232 } 256 }
233 my $all_fact = ""; 257 my $all_fact = "";
234 if(@fact_names) { 258 if(@fact_names) {
235 foreach (@fact_names) { 259 foreach (@fact_names) {
236 $all_fact .= " + factor($_)"; 260 $all_fact .= " + factor($_)";
239 my $all_cp = ""; 263 my $all_cp = "";
240 if(@cp_names) { 264 if(@cp_names) {
241 $all_cp = " + ".join(" + ", @cp_names); 265 $all_cp = " + ".join(" + ", @cp_names);
242 } 266 }
243 print Rcmd " 267 print Rcmd "
244 group_fact <- factor(names(groups)) 268 group_fact <- factor(names(groups))
245 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp}) 269 design <- model.matrix(~ -1 + group_fact${all_fact}${all_cp})
246 colnames(design) <- sub(\"group_fact\", \"\", colnames(design)) 270 colnames(design) <- sub(\"group_fact\", \"\", colnames(design))
247 "; 271 ";
248 foreach my $fct (@fact_names) { 272 foreach my $fct (@fact_names) {
249 print Rcmd " 273 print Rcmd "
250 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design))) 274 colnames(design) <- make.names(sub(\"factor.$fct.\", \"\", colnames(design)))
251 "; 275 ";
252 } 276 }
253 print Rcmd " 277 print Rcmd "
254 isexpr <- rowSums(cpm(toc)>1) >= 2 278 isexpr <- rowSums(cpm(toc)>1) >= 2
255 toc <- toc[isexpr, ] 279 toc <- toc[isexpr, ]
256 pdf(file=\"$OPTIONS{e}/LIMMA_voom.pdf\") 280 pdf(file=\"$OPTIONS{e}/LIMMA_voom.pdf\")
257 y <- voom(toc, design, plot=TRUE, lib.size=colSums(toc)*norm_factors) 281 y <- voom(toc, design, plot=TRUE, lib.size=colSums(toc)*norm_factors)
258 dev.off() 282 dev.off()
259 283
260 pdf(file=\"$OPTIONS{e}/LIMMA_MDS_plot.pdf\") 284 pdf(file=\"$OPTIONS{e}/LIMMA_MDS_plot.pdf\")
261 plotMDS(y, labels=colnames(toc), col=as.numeric(factor(names(groups)))+1, gene.selection=\"common\") 285 plotMDS(y, labels=colnames(toc), col=as.numeric(factor(names(groups)))+1, gene.selection=\"common\")
262 dev.off() 286 dev.off()
263 fit <- lmFit(y, design) 287 fit <- lmFit(y, design)
264 "; 288 ";
265 if(defined $OPTIONS{n}) { 289 if(defined $OPTIONS{n}) {
266 if(defined $OPTIONS{l}) { 290 if(defined $OPTIONS{l}) {
267 print Rcmd " 291 print Rcmd "
268 tab <- data.frame(ID=rownames(y\$E), y\$E, stringsAsFactors=F) 292 tab <- data.frame(ID=rownames(y\$E), y\$E, stringsAsFactors=F)
269 "; 293 ";
270 } else { 294 } else {
271 print Rcmd " 295 print Rcmd "
272 tab <- data.frame(ID=rownames(y\$E), 2^y\$E, stringsAsFactors=F) 296 tab <- data.frame(ID=rownames(y\$E), 2^y\$E, stringsAsFactors=F)
273 "; 297 ";
274 } 298 }
275 print Rcmd " 299 print Rcmd "
276 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F) 300 write.table(tab, \"$OPTIONS{n}\", quote=F, sep=\"\\t\", row.names=F)
277 "; 301 ";
278 } 302 }
279 if(@add_cont) { 303 if(@add_cont) {
280 $all_cont = "\"".join("\", \"", @add_cont)."\""; 304 $all_cont = "\"".join("\", \"", @add_cont)."\"";
281 print Rcmd " 305 print Rcmd "
282 cont <- c(${all_cont}) 306 cont <- c(${all_cont})
283 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont) 307 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"([^0-9])\", sep=\"\"), paste(i, \"\\\\1\", sep=\"\"), cont)
284 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont) 308 for(i in uniq_groups) cont <- gsub(paste(groups[[i]], \"\$\", sep=\"\"), i, cont)
285 "; 309 ";
286 } else { 310 } else {
287 print Rcmd " 311 print Rcmd "
288 cont <- NULL 312 cont <- NULL
289 "; 313 ";
290 } 314 }
291 if(defined $OPTIONS{m}) { 315 if(defined $OPTIONS{m}) {
292 print Rcmd " 316 print Rcmd "
293 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\")) 317 for(i in 1:length(pw_tests)) cont <- c(cont, paste(pw_tests[[i]][2], \"-\", pw_tests[[i]][1], sep=\"\"))
294 "; 318 ";
295 } 319 }
296 if(!defined $OPTIONS{m} && !@add_cont){ 320 if(!defined $OPTIONS{m} && !@add_cont){
297 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n"); 321 die("No Contrasts have been specified, you must at least either select multiple pairwise comparisons or specify a custom contrast\n");
298 } 322 }
299 print Rcmd " 323 print Rcmd "
300 cont <- makeContrasts(contrasts=cont, levels=design) 324 cont <- makeContrasts(contrasts=cont, levels=design)
301 fit2 <- contrasts.fit(fit, cont) 325 fit2 <- contrasts.fit(fit, cont)
302 fit2 <- eBayes(fit2) 326 fit2 <- eBayes(fit2)
303 "; 327 ";
304 } else { 328 } else {
305 die("Anaysis type $OPTIONS{a} not found\n"); 329 die("Anaysis type $OPTIONS{a} not found\n");
306 330
307 } 331 }
308 332
309 if($OPTIONS{a} ne "limma") { 333 if($OPTIONS{a} ne "limma") {
310 print Rcmd " 334 print Rcmd "
311 options(digits = 6) 335 options(digits = 6)
312 tab <- NULL 336 tab <- NULL
313 for(i in names(tested)) { 337 for(i in names(tested)) {
314 tab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\"$OPTIONS{f}\")[[1]] 338 tab_tmp <- topTags(tested[[i]], n=Inf, adjust.method=\"$OPTIONS{f}\")[[1]]
315 colnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\":\") 339 colnames(tab_tmp) <- paste(i, colnames(tab_tmp), sep=\":\")
316 tab_tmp <- tab_tmp[tagnames,] 340 tab_tmp <- tab_tmp[tagnames,]
317 if(is.null(tab)) { 341 if(is.null(tab)) {
318 tab <- tab_tmp 342 tab <- tab_tmp
319 } else tab <- cbind(tab, tab_tmp) 343 } else tab <- cbind(tab, tab_tmp)
320 } 344 }
321 tab <- cbind(Feature=rownames(tab), tab) 345 tab <- cbind(Feature=rownames(tab), tab)
322 "; 346 ";
323 } else { 347 } else {
324 print Rcmd " 348 print Rcmd "
325 tab <- NULL 349 tab <- NULL
326 options(digits = 6) 350 options(digits = 6)
327 for(i in colnames(fit2)) { 351 for(i in colnames(fit2)) {
328 tab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\"none\", adjust.method=\"$OPTIONS{f}\") 352 tab_tmp <- topTable(fit2, coef=i, n=Inf, sort.by=\"none\", adjust.method=\"$OPTIONS{f}\")
329 colnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\":\") 353 colnames(tab_tmp)[-1] <- paste(i, colnames(tab_tmp)[-1], sep=\":\")
330 if(is.null(tab)) { 354 if(is.null(tab)) {
331 tab <- tab_tmp 355 tab <- tab_tmp
332 } else tab <- cbind(tab, tab_tmp[,-1]) 356 } else tab <- cbind(tab, tab_tmp[,-1])
333 } 357 }
334 "; 358 ";
335 } 359 }
336 print Rcmd " 360 print Rcmd "
337 write.table(tab, \"$OPTIONS{o}\", quote=F, sep=\"\\t\", row.names=F) 361 write.table(tab, \"$OPTIONS{o}\", quote=F, sep=\"\\t\", row.names=F)
338 sink(type=\"message\") 362 sink(type=\"message\")
339 sink() 363 sink()
340 "; 364 ";
341 close(Rcmd); 365 close(Rcmd);
342 system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out"); 366 system("R --no-restore --no-save --no-readline < $OPTIONS{e}/r_script.R > $OPTIONS{e}/r_script.out");
343 367
344 open(HTML, ">$OPTIONS{h}"); 368 open(HTML, ">$OPTIONS{h}");
350 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; 374 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n";
351 } 375 }
352 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n"; 376 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n";
353 } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") { 377 } elsif($OPTIONS{a} eq "glm" && $OPTIONS{d} eq "tag") {
354 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n"; 378 print HTML "<li><a href=Tagwise_Dispersion_vs_Abundance.pdf>Tagwise_Dispersion_vs_Abundance.pdf</a></li>\n";
379 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n";
380 } elsif($OPTIONS{a} eq "glm" && ($OPTIONS{d} eq "trend" || $OPTIONS{d} eq "common")) {
381 print HTML "<li><a href=Smear_Plots.pdf>Smear_Plots.pdf</a></li>\n";
355 } elsif($OPTIONS{a} eq "limma") { 382 } elsif($OPTIONS{a} eq "limma") {
356 print HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\n"; 383 print HTML "<li><a href=LIMMA_MDS_plot.pdf>LIMMA_MDS_plot.pdf</a></li>\n";
357 print HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\n"; 384 print HTML "<li><a href=LIMMA_voom.pdf>LIMMA_voom.pdf</a></li>\n";
358 } 385 }
359 print HTML "<li><a href=r_script.R>r_script.R</a></li>\n"; 386 print HTML "<li><a href=r_script.R>r_script.R</a></li>\n";