quantp: quantp.r comparison

comparison quantp.r @ 2:ed0bb50d7ffe draft

planemo upload commit bd6bc95760db6832c77d4d2872281772c31f9039

author	galaxyp
date	Wed, 09 Jan 2019 16:59:24 -0500
parents	bcc7a4c4cc29
children

comparison

equal deleted inserted replaced

-:bcc7a4c4cc29
+:ed0bb50d7ffe
 par(mfrow=c(1,1));
 plot(regmodel, 1, cex.lab=1.5);
 dev.off();
 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] +
 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)),
 shape = 1, size = .1, stroke = .2) +
 theme_light())
 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse="");
 png(outplot,width = 10, height = 10, units = 'in', res=300);
 # bitmap(outplot, "png16m");
 g <- plot(regmodel, 2, cex.lab=1.5);
 ggplotly(g)
 dev.off();
 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] +
 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)),
 shape = 1, size = .1) +
 theme_light())
 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse="");
 png(outplot, width = 10, height = 10, units = 'in',res=300);
 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)}
 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)}
 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] +
 aes(label = PE_TE_data$PE_ID) +
 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) +
 theme_light())
 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
 cat(
 cooksd_df$colors <- "black"
 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)
 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red"
 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors),
 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) +
 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") +
 #xlim(0, 3000) + ylim(0, .15) +
 scale_shape_discrete(solid=F) +
 geom_point(size = 2, shape = 8)  +
 geom_hline(yintercept = cutoff,
 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
 png(outplot, width = 10, height = 10, units = 'in', res=300);
 # bitmap(outplot,"png16m");
 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
 PE_ID, TE_abundance, PE_abundance))))
 suppressMessages(plot(g))
 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot))))
 dev.off();
 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16);
 dev.off();
 # Interactive plot for k-means clustering
 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data),
 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f",
 PE_ID, TE_abundance, PE_abundance),
 color=as.factor(k1$cluster))) +
 xlab("Transcript Abundance") + ylab("Protein Abundance") +
 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) +
 geom_point(size = 1, shape = 8) +
 theme_light() + theme(legend.position="none")
 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
 png(outfile, width = 10, height = 10, units = 'in', res=300);
 # bitmap(outfile, "png16m");
 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
 PE_ID, TE_abundance, PE_abundance)),
 size = .5))
 suppressMessages(plot(g))
 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile))))
 dev.off();
 }
 abline(v = log(2,base=2), col="red", lty=2)
 abline(v = log(0.5,base=2), col="red", lty=2)
 dev.off();
 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color),
 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
 Genes, LogFold, -log10(PE_pval), PE_pval))) +
 xlab("log2 fold change") + ylab("-log10 p-value") +
 geom_point(shape=1, size = 1.5, stroke = .2) +
 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
 abline(v = log(2,base=2), col="red", lty=2)
 abline(v = log(0.5,base=2), col="red", lty=2)
 dev.off();
 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color),
 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
 Genes, LogFold, -log10(TE_pval), TE_pval))) +
 xlab("log2 fold change") + ylab("-log10 p-value") +
 geom_point(shape=1, size = 1.5, stroke = .2) +
 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
 file = htmloutfile, append = TRUE);
 # TE Boxplot
 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
+multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
+lines <- extractWidgetCode(outplot)
+prescripts <- c(prescripts, lines$prescripts)
+postscripts <- c(postscripts, lines$postscripts)
 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
-"<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
+"<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
-multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
 # PE Boxplot
 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
-cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data");
+lines <- extractWidgetCode(outplot)
+postscripts <- c(postscripts, lines$postscripts)
+cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,
+'</td></tr></table>\n', file = htmloutfile, append = TRUE);
 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
 file = htmloutfile, append = TRUE);
 # TE PE scatter
+PE_TE_data = data.frame(PE_df, TE_df);
+colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
 singlesample_scatter(PE_TE_data, outplot);
 lines <- extractWidgetCode(outplot);
 postscripts <- c(postscripts, lines$postscripts);
-cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE);
+cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div), '</td></tr>\n', file = htmloutfile, append = TRUE);
-PE_TE_data = data.frame(PE_df, TE_df);
-colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
 # TE PE Cor
 cat("<tr><td align=center>", file = htmloutfile, append = TRUE);
 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
-extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts));
+extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts,
+gsub('data-for="html', 'data-for="secondhtml"',
+extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts)))
 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
 file = htmloutfile, append = TRUE);
 # TE PE Heatmap

Mercurial > repos > galaxyp > quantp

comparison quantp.r @ 2:ed0bb50d7ffe draft