comparison quantp.r @ 2:ed0bb50d7ffe draft

planemo upload commit bd6bc95760db6832c77d4d2872281772c31f9039
author galaxyp
date Wed, 09 Jan 2019 16:59:24 -0500
parents bcc7a4c4cc29
children
comparison
equal deleted inserted replaced
1:bcc7a4c4cc29 2:ed0bb50d7ffe
58 par(mfrow=c(1,1)); 58 par(mfrow=c(1,1));
59 plot(regmodel, 1, cex.lab=1.5); 59 plot(regmodel, 1, cex.lab=1.5);
60 dev.off(); 60 dev.off();
61 61
62 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] + 62 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] +
63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)), 63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)),
64 shape = 1, size = .1, stroke = .2) + 64 shape = 1, size = .1, stroke = .2) +
65 theme_light()) 65 theme_light())
66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot))) 66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
67 67
68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); 68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse="");
69 png(outplot,width = 10, height = 10, units = 'in', res=300); 69 png(outplot,width = 10, height = 10, units = 'in', res=300);
70 # bitmap(outplot, "png16m"); 70 # bitmap(outplot, "png16m");
72 g <- plot(regmodel, 2, cex.lab=1.5); 72 g <- plot(regmodel, 2, cex.lab=1.5);
73 ggplotly(g) 73 ggplotly(g)
74 dev.off(); 74 dev.off();
75 75
76 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] + 76 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] +
77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)), 77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)),
78 shape = 1, size = .1) + 78 shape = 1, size = .1) +
79 theme_light()) 79 theme_light())
80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) 80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
81 81
82 82
83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); 83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse="");
84 png(outplot, width = 10, height = 10, units = 'in',res=300); 84 png(outplot, width = 10, height = 10, units = 'in',res=300);
89 89
90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)} 90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)}
91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)} 91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)}
92 92
93 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] + 93 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] +
94 aes(label = PE_TE_data$PE_ID) + 94 aes(label = PE_TE_data$PE_ID) +
95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) + 95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) +
96 theme_light()) 96 theme_light())
97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) 97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
98 98
99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); 99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
100 100
101 cat( 101 cat(
213 cooksd_df$colors <- "black" 213 cooksd_df$colors <- "black"
214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T) 214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)
215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red" 215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red"
216 216
217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors), 217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors),
218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) + 218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) +
219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") + 219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") +
220 #xlim(0, 3000) + ylim(0, .15) + 220 #xlim(0, 3000) + ylim(0, .15) +
221 scale_shape_discrete(solid=F) + 221 scale_shape_discrete(solid=F) +
222 geom_point(size = 2, shape = 8) + 222 geom_point(size = 2, shape = 8) +
223 geom_hline(yintercept = cutoff, 223 geom_hline(yintercept = cutoff,
273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
275 png(outplot, width = 10, height = 10, units = 'in', res=300); 275 png(outplot, width = 10, height = 10, units = 'in', res=300);
276 # bitmap(outplot,"png16m"); 276 # bitmap(outplot,"png16m");
277 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + 277 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + 278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + 279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", 280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
281 PE_ID, TE_abundance, PE_abundance)))) 281 PE_ID, TE_abundance, PE_abundance))))
282 suppressMessages(plot(g)) 282 suppressMessages(plot(g))
283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot)))) 283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot))))
284 dev.off(); 284 dev.off();
285 285
286 286
438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); 438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16);
439 dev.off(); 439 dev.off();
440 440
441 # Interactive plot for k-means clustering 441 # Interactive plot for k-means clustering
442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data), 442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data),
443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f", 443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f",
444 PE_ID, TE_abundance, PE_abundance), 444 PE_ID, TE_abundance, PE_abundance),
445 color=as.factor(k1$cluster))) + 445 color=as.factor(k1$cluster))) +
446 xlab("Transcript Abundance") + ylab("Protein Abundance") + 446 xlab("Transcript Abundance") + ylab("Protein Abundance") +
447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) + 447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) +
448 geom_point(size = 1, shape = 8) + 448 geom_point(size = 1, shape = 8) +
449 theme_light() + theme(legend.position="none") 449 theme_light() + theme(legend.position="none")
450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot))) 450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
475 png(outfile, width = 10, height = 10, units = 'in', res=300); 475 png(outfile, width = 10, height = 10, units = 'in', res=300);
476 # bitmap(outfile, "png16m"); 476 # bitmap(outfile, "png16m");
477 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + 477 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + 478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + 479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", 480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
481 PE_ID, TE_abundance, PE_abundance)), 481 PE_ID, TE_abundance, PE_abundance)),
482 size = .5)) 482 size = .5))
483 suppressMessages(plot(g)) 483 suppressMessages(plot(g))
484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile)))) 484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile))))
485 dev.off(); 485 dev.off();
486 } 486 }
487 487
680 abline(v = log(2,base=2), col="red", lty=2) 680 abline(v = log(2,base=2), col="red", lty=2)
681 abline(v = log(0.5,base=2), col="red", lty=2) 681 abline(v = log(0.5,base=2), col="red", lty=2)
682 dev.off(); 682 dev.off();
683 683
684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color), 684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color),
685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", 685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
686 Genes, LogFold, -log10(PE_pval), PE_pval))) + 686 Genes, LogFold, -log10(PE_pval), PE_pval))) +
687 xlab("log2 fold change") + ylab("-log10 p-value") + 687 xlab("log2 fold change") + ylab("-log10 p-value") +
688 geom_point(shape=1, size = 1.5, stroke = .2) + 688 geom_point(shape=1, size = 1.5, stroke = .2) +
689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + 689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + 690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + 691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
720 abline(v = log(2,base=2), col="red", lty=2) 720 abline(v = log(2,base=2), col="red", lty=2)
721 abline(v = log(0.5,base=2), col="red", lty=2) 721 abline(v = log(0.5,base=2), col="red", lty=2)
722 dev.off(); 722 dev.off();
723 723
724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color), 724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color),
725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", 725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
726 Genes, LogFold, -log10(TE_pval), TE_pval))) + 726 Genes, LogFold, -log10(TE_pval), TE_pval))) +
727 xlab("log2 fold change") + ylab("-log10 p-value") + 727 xlab("log2 fold change") + ylab("-log10 p-value") +
728 geom_point(shape=1, size = 1.5, stroke = .2) + 728 geom_point(shape=1, size = 1.5, stroke = .2) +
729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + 729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + 730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', 972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
973 file = htmloutfile, append = TRUE); 973 file = htmloutfile, append = TRUE);
974 974
975 # TE Boxplot 975 # TE Boxplot
976 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); 976 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
977 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
978 lines <- extractWidgetCode(outplot)
979 prescripts <- c(prescripts, lines$prescripts)
980 postscripts <- c(postscripts, lines$postscripts)
977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', 981 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', 982 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); 983 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
981 984
982 # PE Boxplot 985 # PE Boxplot
983 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); 986 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); 987 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data");
986 988 lines <- extractWidgetCode(outplot)
989 postscripts <- c(postscripts, lines$postscripts)
990 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,
991 '</td></tr></table>\n', file = htmloutfile, append = TRUE);
987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', 992 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
988 file = htmloutfile, append = TRUE); 993 file = htmloutfile, append = TRUE);
989 994
990 # TE PE scatter 995 # TE PE scatter
996 PE_TE_data = data.frame(PE_df, TE_df);
997 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); 998 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); 999 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
993 singlesample_scatter(PE_TE_data, outplot); 1000 singlesample_scatter(PE_TE_data, outplot);
994 lines <- extractWidgetCode(outplot); 1001 lines <- extractWidgetCode(outplot);
995 postscripts <- c(postscripts, lines$postscripts); 1002 postscripts <- c(postscripts, lines$postscripts);
996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE); 1003 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div), '</td></tr>\n', file = htmloutfile, append = TRUE);
997 PE_TE_data = data.frame(PE_df, TE_df);
998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
999 1004
1000 # TE PE Cor 1005 # TE PE Cor
1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); 1006 cat("<tr><td align=center>", file = htmloutfile, append = TRUE);
1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); 1007 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', 1008 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); 1017 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts, 1018 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts, 1019 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts, 1020 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts, 1021 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts)); 1022 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts,
1023 gsub('data-for="html', 'data-for="secondhtml"',
1024 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts)))
1018 1025
1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', 1026 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
1020 file = htmloutfile, append = TRUE); 1027 file = htmloutfile, append = TRUE);
1021 1028
1022 # TE PE Heatmap 1029 # TE PE Heatmap