Mercurial > repos > galaxyp > quantp
comparison quantp.r @ 1:bcc7a4c4cc29 draft
planemo upload commit 1887dff812162880d66b003a927867cd5000c98f
author | galaxyp |
---|---|
date | Thu, 20 Dec 2018 16:06:05 -0500 |
parents | 75faf9a89f5b |
children | ed0bb50d7ffe |
comparison
equal
deleted
inserted
replaced
0:75faf9a89f5b | 1:bcc7a4c4cc29 |
---|---|
14 tempdf[is.na(tempdf)] = 0; | 14 tempdf[is.na(tempdf)] = 0; |
15 tempdf$Group = tempgrp; | 15 tempdf$Group = tempgrp; |
16 png(outfile, width = 6, height = 6, units = 'in', res=300); | 16 png(outfile, width = 6, height = 6, units = 'in', res=300); |
17 # bitmap(outfile, "png16m"); | 17 # bitmap(outfile, "png16m"); |
18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); | 18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); |
19 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot))) | |
19 plot(g); | 20 plot(g); |
20 dev.off(); | 21 dev.off(); |
21 } | 22 } |
22 | 23 |
23 #=============================================================================== | 24 #=============================================================================== |
28 rownames(PE_TE_data) = PE_TE_data$PE_ID; | 29 rownames(PE_TE_data) = PE_TE_data$PE_ID; |
29 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data); | 30 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data); |
30 regmodel_summary = summary(regmodel); | 31 regmodel_summary = summary(regmodel); |
31 | 32 |
32 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n", | 33 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n", |
33 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n", | 34 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n", |
34 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 35 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
35 file = htmloutfile, append = TRUE); | 36 file = htmloutfile, append = TRUE); |
36 | 37 |
37 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n", | 38 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n", |
38 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n", | 39 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n", |
39 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n", | 40 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n", |
40 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n", | 41 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n", |
41 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n", | 42 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n", |
42 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n", | 43 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n", |
43 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n", | 44 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n", |
44 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n", | 45 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n", |
45 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n", | 46 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n", |
46 file = htmloutfile, append = TRUE); | 47 file = htmloutfile, append = TRUE); |
47 | 48 |
48 cat("</table>\n", file = htmloutfile, append = TRUE); | 49 cat("</table>\n", file = htmloutfile, append = TRUE); |
49 | 50 |
50 cat( | 51 cat( |
51 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n", | 52 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n", |
56 # bitmap(outplot, "png16m"); | 57 # bitmap(outplot, "png16m"); |
57 par(mfrow=c(1,1)); | 58 par(mfrow=c(1,1)); |
58 plot(regmodel, 1, cex.lab=1.5); | 59 plot(regmodel, 1, cex.lab=1.5); |
59 dev.off(); | 60 dev.off(); |
60 | 61 |
62 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] + | |
63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)), | |
64 shape = 1, size = .1, stroke = .2) + | |
65 theme_light()) | |
66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot))) | |
67 | |
61 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); | 68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); |
62 png(outplot,width = 10, height = 10, units = 'in', res=300); | 69 png(outplot,width = 10, height = 10, units = 'in', res=300); |
63 # bitmap(outplot, "png16m"); | 70 # bitmap(outplot, "png16m"); |
64 par(mfrow=c(1,1)); | 71 par(mfrow=c(1,1)); |
65 plot(regmodel, 2, cex.lab=1.5); | 72 g <- plot(regmodel, 2, cex.lab=1.5); |
73 ggplotly(g) | |
66 dev.off(); | 74 dev.off(); |
75 | |
76 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] + | |
77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)), | |
78 shape = 1, size = .1) + | |
79 theme_light()) | |
80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
81 | |
67 | 82 |
68 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); | 83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); |
69 png(outplot, width = 10, height = 10, units = 'in',res=300); | 84 png(outplot, width = 10, height = 10, units = 'in',res=300); |
70 # bitmap(outplot, "png16m"); | 85 # bitmap(outplot, "png16m"); |
71 par(mfrow=c(1,1)); | 86 par(mfrow=c(1,1)); |
72 plot(regmodel, 5, cex.lab=1.5); | 87 plot(regmodel, 5, cex.lab=1.5); |
73 dev.off(); | 88 dev.off(); |
74 | 89 |
90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)} | |
91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)} | |
92 | |
93 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] + | |
94 aes(label = PE_TE_data$PE_ID) + | |
95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) + | |
96 theme_light()) | |
97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
98 | |
75 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); | 99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); |
76 | 100 |
77 cat( | 101 cat( |
78 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n", | 102 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n", |
79 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n', | 103 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n', |
80 file = htmloutfile, append = TRUE); | 104 file = htmloutfile, append = TRUE); |
81 | 105 |
82 cat( | 106 cat( |
83 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600></td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600></td></tr>\n', | 107 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600>', |
84 file = htmloutfile, append = TRUE); | 108 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div), |
109 '</td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600>', | |
110 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div), | |
111 '</td></tr>\n', file = htmloutfile, append = TRUE); | |
85 | 112 |
86 cat( | 113 cat( |
87 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n', | 114 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n', |
88 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n', | 115 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n', |
89 file = htmloutfile, append = TRUE); | 116 file = htmloutfile, append = TRUE); |
90 | 117 |
91 | 118 |
92 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 119 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
93 # Residuals data | 120 # Residuals data |
94 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 121 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
95 res_all = regmodel$residuals; | 122 res_all = regmodel$residuals; |
96 res_mean = mean(res_all); | 123 res_mean = mean(res_all); |
97 res_sd = sd(res_all); | 124 res_sd = sd(res_all); |
98 res_diff = (res_all-res_mean); | 125 res_diff = (res_all-res_mean); |
99 res_zscore = res_diff/res_sd; | 126 res_zscore = res_diff/res_sd; |
100 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] | 127 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] |
101 | 128 |
102 | 129 |
103 tempind = which((res_zscore > 2)|(res_zscore < -2)); | 130 tempind = which((res_zscore > 2)|(res_zscore < -2)); |
104 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; | 131 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; |
105 res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; | 132 res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; |
106 res_PE_TE_data_outlier = PE_TE_data[tempind,]; | 133 res_PE_TE_data_outlier = PE_TE_data[tempind,]; |
107 res_PE_TE_data_outlier$residuals = res_all[tempind]; | 134 res_PE_TE_data_outlier$residuals = res_all[tempind]; |
108 | 135 |
109 # Save the complete table for download (influential_observations) | 136 # Save the complete table for download (influential_observations) |
110 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) | 137 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) |
111 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") | 138 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") |
112 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); | 139 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); |
113 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 140 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
114 | 141 |
115 | 142 |
116 # Save the complete table for download (non influential_observations) | 143 # Save the complete table for download (non influential_observations) |
117 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) | 144 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) |
118 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") | 145 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") |
119 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); | 146 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); |
120 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 147 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
121 | 148 |
122 | 149 |
123 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n', | 150 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n', |
124 file = htmloutfile, append = TRUE); | 151 file = htmloutfile, append = TRUE); |
125 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | 152 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', |
126 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n', | 153 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n', |
127 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 154 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
128 file = htmloutfile, append = TRUE); | 155 file = htmloutfile, append = TRUE); |
129 | 156 |
130 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n", | 157 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n", |
131 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n", | 158 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n", |
132 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href=PE_TE_outliers_residuals.txt target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n', | 159 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href="PE_TE_outliers_residuals.txt" target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n', |
133 '<tr><td colspan=2 align=center><font size=4>(<b><a href=PE_TE_abundance_residuals.txt target="_blank">Download the complete residuals data here</a></b>)</font></td></td>\n', | 160 '<tr><td colspan=2 align=center>', |
134 "</table><br><br>\n", | 161 '<font size=4>(<b><a href="PE_TE_abundance_residuals.txt" target="_blank">Download the complete residuals data here</a></b>)</font>', |
135 file = htmloutfile, append = TRUE); | 162 "</td></tr>\n</table><br><br>\n", |
136 | 163 file = htmloutfile, append = TRUE); |
164 | |
137 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 165 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
138 | 166 |
139 | 167 |
140 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); | 168 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); |
141 | 169 |
142 cat( | 170 cat( |
143 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n', | 171 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n', |
144 file = htmloutfile, append = TRUE); | 172 file = htmloutfile, append = TRUE); |
145 | 173 |
146 cat( | 174 cat( |
147 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600></td></tr>\n', | 175 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600>', |
176 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div) | |
177 , '</td></tr>\n', | |
148 file = htmloutfile, append = TRUE); | 178 file = htmloutfile, append = TRUE); |
149 | 179 |
150 cat( | 180 cat( |
151 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n', | 181 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n', |
152 file = htmloutfile, append = TRUE); | 182 file = htmloutfile, append = TRUE); |
155 | 185 |
156 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | 186 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
157 # Cook's Distance | 187 # Cook's Distance |
158 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | 188 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
159 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n', | 189 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n', |
160 file = htmloutfile, append = TRUE); | 190 file = htmloutfile, append = TRUE); |
161 cat( | 191 cat( |
162 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n', | 192 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n', |
163 file = htmloutfile, append = TRUE); | 193 file = htmloutfile, append = TRUE); |
164 | 194 |
165 cooksd <- cooks.distance(regmodel); | 195 cooksd <- cooks.distance(regmodel); |
175 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black") | 205 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black") |
176 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line | 206 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line |
177 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels | 207 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels |
178 dev.off(); | 208 dev.off(); |
179 | 209 |
210 cooksd_df <- data.frame(cooksd) | |
211 cooksd_df$genes <- row.names(cooksd_df) | |
212 cooksd_df$index <- 1:nrow(cooksd_df) | |
213 cooksd_df$colors <- "black" | |
214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T) | |
215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red" | |
216 | |
217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors), | |
218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) + | |
219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") + | |
220 #xlim(0, 3000) + ylim(0, .15) + | |
221 scale_shape_discrete(solid=F) + | |
222 geom_point(size = 2, shape = 8) + | |
223 geom_hline(yintercept = cutoff, | |
224 linetype = "dashed", color = "red") + | |
225 scale_color_manual(values = c("black" = "black", "red" = "red")) + | |
226 theme_light() + theme(legend.position="none") | |
227 saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
228 | |
180 cat( | 229 cat( |
181 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>', | 230 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>', |
231 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), | |
182 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>', | 232 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>', |
183 file = htmloutfile, append = TRUE); | 233 file = htmloutfile, append = TRUE); |
184 | 234 |
185 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); | 235 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); |
186 PE_TE_data_no_outlier = PE_TE_data[-tempind,]; | 236 PE_TE_data_no_outlier = PE_TE_data[-tempind,]; |
187 PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; | 237 PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; |
188 PE_TE_data_outlier = PE_TE_data[tempind,]; | 238 PE_TE_data_outlier = PE_TE_data[tempind,]; |
189 PE_TE_data_outlier$cooksd = cooksd[tempind]; | 239 PE_TE_data_outlier$cooksd = cooksd[tempind]; |
193 cat( | 243 cat( |
194 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 244 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
195 file = htmloutfile, append = TRUE); | 245 file = htmloutfile, append = TRUE); |
196 | 246 |
197 # Save the complete table for download (influential_observations) | 247 # Save the complete table for download (influential_observations) |
198 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) | 248 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) |
199 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") | 249 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") |
200 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); | 250 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); |
201 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 251 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
202 | 252 |
203 | 253 |
204 # Save the complete table for download (non influential_observations) | 254 # Save the complete table for download (non influential_observations) |
205 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) | 255 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) |
206 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") | 256 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") |
207 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); | 257 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); |
208 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 258 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
209 | 259 |
210 | 260 |
211 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n", | 261 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n", |
212 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n", | 262 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n", |
213 | 263 |
214 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n", | 264 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n", |
215 "</table><br><br>\n", | 265 "</table><br><br>\n", |
216 file = htmloutfile, append = TRUE); | 266 file = htmloutfile, append = TRUE); |
217 | 267 |
218 | 268 |
219 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 269 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
220 # Scatter plot after removal of influential points | 270 # Scatter plot after removal of influential points |
221 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 271 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
222 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); | 272 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); |
223 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
224 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
225 png(outplot, width = 10, height = 10, units = 'in', res=300); | 275 png(outplot, width = 10, height = 10, units = 'in', res=300); |
226 # bitmap(outplot,"png16m"); | 276 # bitmap(outplot,"png16m"); |
227 g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); | 277 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + |
228 suppressMessages(plot(g)); | 278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + |
229 dev.off(); | 279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + |
230 | 280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", |
231 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE); | 281 PE_ID, TE_abundance, PE_abundance)))) |
232 # Before | 282 suppressMessages(plot(g)) |
233 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->", '<img src="TE_PE_scatter.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE); | 283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot)))) |
234 | 284 dev.off(); |
235 # After | 285 |
236 cat("<td align=center>\n", | 286 |
237 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600></td></tr>\n', | 287 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE); |
238 file = htmloutfile, append = TRUE); | 288 # Before |
239 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 289 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->", |
240 | 290 '<img src="TE_PE_scatter.png" width=600 height=600>', |
291 gsub('id="html', 'id="secondhtml"', | |
292 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)), | |
293 '</td>\n', | |
294 file = htmloutfile, append = TRUE); | |
295 | |
296 # After | |
297 cat("<td align=center>\n", | |
298 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600>', | |
299 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div), | |
300 | |
301 '</td></tr>\n', | |
302 file = htmloutfile, append = TRUE); | |
303 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
304 | |
241 | 305 |
242 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); | 306 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); |
243 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); | 307 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); |
244 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall"); | 308 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall"); |
245 | 309 |
247 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 311 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
248 cat('</td>\n', file = htmloutfile, append=TRUE); | 312 cat('</td>\n', file = htmloutfile, append=TRUE); |
249 | 313 |
250 | 314 |
251 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n', | 315 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n', |
252 file = htmloutfile, append = TRUE); | 316 file = htmloutfile, append = TRUE); |
253 | 317 |
254 cat( | 318 cat( |
255 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n", | 319 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n", |
256 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n", | 320 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n", |
257 file = htmloutfile, append = TRUE) | 321 file = htmloutfile, append = TRUE) |
265 }else{ | 329 }else{ |
266 tab_n_row = 10; | 330 tab_n_row = 10; |
267 } | 331 } |
268 | 332 |
269 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font> ", | 333 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font> ", |
270 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n", | 334 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n", |
271 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n', | 335 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n', |
272 file = htmloutfile, append = TRUE); | 336 file = htmloutfile, append = TRUE); |
273 | 337 |
274 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE); | 338 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE); |
275 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n", | 339 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n", |
276 file = htmloutfile, append = TRUE); | 340 file = htmloutfile, append = TRUE); |
277 | 341 |
278 | 342 |
279 for(i in 1:tab_n_row) | 343 for(i in 1:tab_n_row) |
280 { | 344 { |
281 cat( | 345 cat( |
283 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n', | 347 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n', |
284 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n', | 348 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n', |
285 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n', | 349 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n', |
286 file = htmloutfile, append = TRUE); | 350 file = htmloutfile, append = TRUE); |
287 } | 351 } |
288 cat('</table><br><br>\n',file = htmloutfile, append = TRUE); | 352 cat('</table><br><br>\n',file = htmloutfile, append = TRUE); |
289 | 353 |
290 | 354 |
291 } | 355 } |
292 | 356 |
293 | 357 |
294 | 358 |
295 #=============================================================================== | 359 #=============================================================================== |
296 # Heatmap | 360 # Heatmap |
297 #=============================================================================== | 361 #=============================================================================== |
298 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ | 362 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ |
299 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n', | 363 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n', |
300 file = htmloutfile, append = TRUE); | 364 file = htmloutfile, append = TRUE); |
301 | 365 |
302 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) | 366 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) |
303 hm_cluster = cutree(hc,k=hm_nclust); | 367 hm_cluster = cutree(hc,k=hm_nclust); |
304 | 368 |
305 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); | 369 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); |
306 png(outplot, width = 10, height = 10, units = 'in', res=300); | 370 png(outplot, width = 10, height = 10, units = 'in', res=300); |
307 # bitmap(outplot, "png16m"); | 371 # bitmap(outplot, "png16m"); |
308 par(mfrow=c(1,1)); | 372 par(mfrow=c(1,1)); |
309 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("Proteins","Transcripts"), scale="col", hclustfun = hclust, distfun = dist); | 373 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), |
374 trace="none", cexCol=1, col=greenred(100),Colv=F, | |
375 labCol=c("Proteins","Transcripts"), scale="col", | |
376 hclustfun = hclust, distfun = dist); | |
377 | |
310 dev.off(); | 378 dev.off(); |
311 | 379 |
312 cat('<tr><td align=center colspan="2"><img src="PE_TE_heatmap.png" width=800 height=800></td></tr>\n', | 380 |
313 file = htmloutfile, append = TRUE); | 381 p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col", |
382 dendrogram = "row", colors = greenred(100), | |
383 hclustfun = hclust, distfun = dist, | |
384 show_grid = FALSE) | |
385 saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot))) | |
386 | |
387 cat('<tr><td align=center colspan="2">', | |
388 '<img src="PE_TE_heatmap.png" width=800 height=800>', | |
389 gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), | |
390 '</td></tr>\n', | |
391 file = htmloutfile, append = TRUE); | |
314 | 392 |
315 | 393 |
316 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); | 394 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); |
317 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)") | 395 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)") |
318 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse=""); | 396 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse=""); |
319 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") | 397 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") |
320 | 398 |
321 | 399 |
322 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n', | 400 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n', |
323 file = htmloutfile, append = TRUE); | 401 file = htmloutfile, append = TRUE); |
324 } | 402 } |
325 | 403 |
326 | 404 |
327 #=============================================================================== | 405 #=============================================================================== |
328 # K-means clustering | 406 # K-means clustering |
335 # bitmap(outplot, "png16m"); | 413 # bitmap(outplot, "png16m"); |
336 par(mfrow=c(1,1)); | 414 par(mfrow=c(1,1)); |
337 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); | 415 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); |
338 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) | 416 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) |
339 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) | 417 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) |
340 | |
341 | 418 |
342 ind=which(k1$cluster==1); | 419 ind=which(k1$cluster==1); |
343 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); | 420 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); |
344 ind=which(k1$cluster==2); | 421 ind=which(k1$cluster==2); |
345 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16); | 422 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16); |
359 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16); | 436 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16); |
360 ind=which(k1$cluster==10); | 437 ind=which(k1$cluster==10); |
361 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); | 438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); |
362 dev.off(); | 439 dev.off(); |
363 | 440 |
441 # Interactive plot for k-means clustering | |
442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data), | |
443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f", | |
444 PE_ID, TE_abundance, PE_abundance), | |
445 color=as.factor(k1$cluster))) + | |
446 xlab("Transcript Abundance") + ylab("Protein Abundance") + | |
447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) + | |
448 geom_point(size = 1, shape = 8) + | |
449 theme_light() + theme(legend.position="none") | |
450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot))) | |
451 | |
364 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n', | 452 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n', |
365 file = htmloutfile, append = TRUE); | 453 file = htmloutfile, append = TRUE); |
366 | 454 |
367 tempind = order(k1$cluster); | 455 tempind = order(k1$cluster); |
368 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); | 456 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); |
369 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") | 457 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") |
370 | 458 |
371 | 459 #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); |
372 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800></td></tr>\n', | 460 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800>', |
373 file = htmloutfile, append = TRUE); | 461 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '</td></tr>\n', |
462 file = htmloutfile, append = TRUE); | |
374 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n', | 463 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n', |
375 file = htmloutfile, append = TRUE); | 464 file = htmloutfile, append = TRUE); |
376 | 465 |
377 } | 466 } |
378 | 467 |
379 #=============================================================================== | 468 #=============================================================================== |
380 # scatter plot | 469 # scatter plot |
383 { | 472 { |
384 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
385 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
386 png(outfile, width = 10, height = 10, units = 'in', res=300); | 475 png(outfile, width = 10, height = 10, units = 'in', res=300); |
387 # bitmap(outfile, "png16m"); | 476 # bitmap(outfile, "png16m"); |
388 g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); | 477 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + |
389 suppressMessages(plot(g)); | 478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + |
390 # plot(g); | 479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + |
480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", | |
481 PE_ID, TE_abundance, PE_abundance)), | |
482 size = .5)) | |
483 suppressMessages(plot(g)) | |
484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile)))) | |
391 dev.off(); | 485 dev.off(); |
392 } | 486 } |
393 | 487 |
394 #=============================================================================== | 488 #=============================================================================== |
395 # Correlation table | 489 # Correlation table |
420 tempdf = df[,-1, drop=FALSE]; | 514 tempdf = df[,-1, drop=FALSE]; |
421 tempdf = t(tempdf) %>% as.data.frame(); | 515 tempdf = t(tempdf) %>% as.data.frame(); |
422 tempdf[is.na(tempdf)] = 0; | 516 tempdf[is.na(tempdf)] = 0; |
423 tempdf$Sample = rownames(tempdf); | 517 tempdf$Sample = rownames(tempdf); |
424 tempdf1 = melt(tempdf, id.vars = "Sample"); | 518 tempdf1 = melt(tempdf, id.vars = "Sample"); |
519 | |
520 if("Gene" %in% colnames(df)){ | |
521 tempdf1$Name = df$Gene; | |
522 } else if ("Protein" %in% colnames(df)){ | |
523 tempdf1$Name = df$Protein; | |
524 } else if ("Genes" %in% colnames(df)){ | |
525 tempdf1$Name = df$Genes; | |
526 } | |
527 | |
425 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; | 528 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; |
426 png(outplot, width = 6, height = 6, units = 'in', res=300); | 529 png(outplot, width = 6, height = 6, units = 'in', res=300); |
427 # bitmap(outplot, "png16m"); | 530 # bitmap(outplot, "png16m"); |
428 if(fill_leg=="Yes") | 531 if(fill_leg=="No"){ |
429 { | 532 tempdf1$Group = c("case", "control") |
430 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) | 533 } |
534 | |
535 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + | |
536 geom_boxplot()+ | |
537 labs(x=user_xlab) + labs(y=user_ylab) | |
538 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile))) | |
539 plot(g); | |
540 dev.off(); | |
541 } | |
542 | |
543 ## A wrapper to saveWidget which compensates for arguable BUG in | |
544 ## saveWidget which requires `file` to be in current working | |
545 ## directory. | |
546 saveWidget <- function (widget,file,...) { | |
547 wd<-getwd() | |
548 on.exit(setwd(wd)) | |
549 outDir<-dirname(file) | |
550 file<-basename(file) | |
551 setwd(outDir); | |
552 htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE) | |
553 } | |
554 | |
555 #=============================================================================== | |
556 # Mean or Median of Replicates | |
557 #=============================================================================== | |
558 | |
559 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) | |
560 { | |
561 grps = unique(sampleinfo_df[,2]); | |
562 | |
563 TE_df_merged <<- sapply(grps, function(x){ | |
564 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
565 if(length(tempsample)!=1){ | |
566 apply(TE_df[,tempsample],1,method); | |
567 }else{ | |
568 return(TE_df[,tempsample]); | |
569 } | |
570 }); | |
571 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); | |
572 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); | |
573 | |
574 PE_df_merged <<- sapply(grps, function(x){ | |
575 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
576 if(length(tempsample)!=1){ | |
577 apply(PE_df[,tempsample],1,method); | |
578 }else{ | |
579 return(PE_df[,tempsample]); | |
580 } | |
581 }); | |
582 | |
583 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); | |
584 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); | |
585 | |
586 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); | |
587 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); | |
588 | |
589 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); | |
590 } | |
591 | |
592 #=============================================================================== | |
593 # (T-Test or Wilcoxon ranksum test) and Volcano Plot | |
594 #=============================================================================== | |
595 | |
596 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) | |
597 { | |
598 | |
599 PE_colnames = colnames(PE_df_data); | |
600 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
601 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
602 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
603 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
604 | |
605 if(method=="mean"){ | |
606 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
607 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
431 }else{ | 608 }else{ |
432 if(fill_leg=="No") | 609 if(method=="median"){ |
433 { | 610 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) |
434 tempdf1$Group = c("case", "control") | 611 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); |
435 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) | |
436 } | 612 } |
437 } | 613 } |
438 plot(g); | 614 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) |
439 dev.off(); | 615 |
440 } | 616 |
441 | 617 TE_colnames = colnames(TE_df_data); |
442 | 618 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; |
443 #=============================================================================== | 619 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); |
444 # Mean or Median of Replicates | 620 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; |
445 #=============================================================================== | 621 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); |
446 | 622 |
447 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) | 623 if(method=="mean"){ |
448 { | 624 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); |
449 grps = unique(sampleinfo_df[,2]); | 625 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) |
450 | |
451 TE_df_merged <<- sapply(grps, function(x){ | |
452 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
453 if(length(tempsample)!=1){ | |
454 apply(TE_df[,tempsample],1,method); | |
455 }else{ | 626 }else{ |
456 return(TE_df[,tempsample]); | 627 if(method=="median"){ |
628 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
629 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
630 } | |
457 } | 631 } |
458 }); | 632 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) |
459 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); | 633 |
460 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); | 634 |
461 | 635 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); |
462 PE_df_merged <<- sapply(grps, function(x){ | 636 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); |
463 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | 637 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); |
464 if(length(tempsample)!=1){ | 638 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); |
465 apply(PE_df[,tempsample],1,method); | 639 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n", |
640 file = htmloutfile, append = TRUE); | |
641 | |
642 if(length(condition_ind)!=1) | |
643 { | |
644 # Volcano Plot | |
645 | |
646 if(volc_with=="adj_pval") | |
647 { | |
648 PE_pval = PE_adj_pval | |
649 TE_pval = TE_adj_pval | |
650 volc_ylab = "-log10 Adjusted p-value"; | |
651 }else{ | |
652 if(volc_with=="pval") | |
653 { | |
654 volc_ylab = "-log10 p-value"; | |
655 } | |
656 } | |
657 outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse=""); | |
658 png(outplot_PE, width = 10, height = 10, units = 'in', res=300); | |
659 # bitmap(outplot, "png16m"); | |
660 par(mfrow=c(1,1)); | |
661 | |
662 plot(PE_df_logfold$LogFold, -log10(PE_pval), | |
663 xlab="log2 fold change", ylab=volc_ylab, | |
664 type="n") | |
665 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
666 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") | |
667 PE_df_logfold$color <- "black" | |
668 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
669 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
670 sel1 <- which(PE_pval<=0.05) | |
671 sel=intersect(sel,sel1) | |
672 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red") | |
673 PE_df_logfold[sel,]$color <- "red" | |
674 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
675 sel1 <- which(PE_pval>0.05) | |
676 sel=intersect(sel,sel1) | |
677 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") | |
678 PE_df_logfold[sel,]$color <- "blue" | |
679 abline(h = -log(0.05,base=10), col="red", lty=2) | |
680 abline(v = log(2,base=2), col="red", lty=2) | |
681 abline(v = log(0.5,base=2), col="red", lty=2) | |
682 dev.off(); | |
683 | |
684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color), | |
685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", | |
686 Genes, LogFold, -log10(PE_pval), PE_pval))) + | |
687 xlab("log2 fold change") + ylab("-log10 p-value") + | |
688 geom_point(shape=1, size = 1.5, stroke = .2) + | |
689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + | |
690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + | |
691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + | |
692 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + | |
693 theme_light() + theme(legend.position="none") | |
694 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE))) | |
695 | |
696 outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse=""); | |
697 png(outplot_TE, width = 10, height = 10, units = 'in', res=300); | |
698 # bitmap(outplot, "png16m"); | |
699 par(mfrow=c(1,1)); | |
700 | |
701 plot(TE_df_logfold$LogFold, -log10(TE_pval), | |
702 xlab="log2 fold change", ylab=volc_ylab, | |
703 type="n") | |
704 | |
705 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
706 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") | |
707 TE_df_logfold$color <- "black" | |
708 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
709 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
710 sel1 <- which(TE_pval<=0.05) | |
711 sel=intersect(sel,sel1) | |
712 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red") | |
713 TE_df_logfold[sel,]$color <- "red" | |
714 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
715 sel1 <- which(TE_pval>0.05) | |
716 sel=intersect(sel,sel1) | |
717 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") | |
718 TE_df_logfold[sel,]$color <- "blue" | |
719 abline(h = -log(0.05,base=10), col="red", lty=2) | |
720 abline(v = log(2,base=2), col="red", lty=2) | |
721 abline(v = log(0.5,base=2), col="red", lty=2) | |
722 dev.off(); | |
723 | |
724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color), | |
725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", | |
726 Genes, LogFold, -log10(TE_pval), TE_pval))) + | |
727 xlab("log2 fold change") + ylab("-log10 p-value") + | |
728 geom_point(shape=1, size = 1.5, stroke = .2) + | |
729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + | |
730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + | |
731 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + | |
732 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + | |
733 theme_light() + theme(legend.position="none") | |
734 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE))) | |
735 | |
736 | |
737 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE); | |
738 cat("<tr><td align=center>", | |
739 '<img src="TE_volcano.png" width=600 height=600>', | |
740 extractWidgetCode(outplot_TE)$widget_div, | |
741 '</td>\n', file = htmloutfile, append = TRUE); | |
742 cat("<td align=center>", | |
743 '<img src="PE_volcano.png" width=600 height=600>', | |
744 extractWidgetCode(outplot_PE)$widget_div, | |
745 '</td></tr></table><br>\n', | |
746 file = htmloutfile, append = TRUE); | |
747 | |
748 | |
466 }else{ | 749 }else{ |
467 return(PE_df[,tempsample]); | 750 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>', |
751 file = htmloutfile, append = TRUE); | |
468 } | 752 } |
469 }); | 753 |
470 | |
471 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); | |
472 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); | |
473 | |
474 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); | |
475 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); | |
476 | |
477 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); | |
478 } | |
479 | |
480 #=============================================================================== | |
481 # (T-Test or Wilcoxon ranksum test) and Volcano Plot | |
482 #=============================================================================== | |
483 | |
484 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) | |
485 { | |
486 | |
487 PE_colnames = colnames(PE_df_data); | |
488 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
489 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
490 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
491 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
492 | |
493 if(method=="mean"){ | |
494 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
495 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
496 }else{ | |
497 if(method=="median"){ | |
498 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
499 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
500 } | |
501 } | |
502 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) | |
503 | |
504 | |
505 TE_colnames = colnames(TE_df_data); | |
506 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
507 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); | |
508 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
509 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); | |
510 | |
511 if(method=="mean"){ | |
512 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
513 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
514 }else{ | |
515 if(method=="median"){ | |
516 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
517 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
518 } | |
519 } | |
520 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) | |
521 | |
522 | |
523 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); | |
524 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); | |
525 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); | |
526 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); | |
527 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n", | |
528 file = htmloutfile, append = TRUE); | |
529 | |
530 if(length(condition_ind)!=1) | |
531 { | |
532 # Volcano Plot | |
533 | |
534 if(volc_with=="adj_pval") | |
535 { | |
536 PE_pval = PE_adj_pval | |
537 TE_pval = TE_adj_pval | |
538 volc_ylab = "-log10 Adjusted p-value"; | |
539 }else{ | |
540 if(volc_with=="pval") | |
541 { | |
542 volc_ylab = "-log10 p-value"; | |
543 } | |
544 } | |
545 outplot = paste(outdir,"/PE_volcano.png",sep="",collapse=""); | |
546 png(outplot, width = 10, height = 10, units = 'in', res=300); | |
547 # bitmap(outplot, "png16m"); | |
548 par(mfrow=c(1,1)); | |
549 | |
550 plot(PE_df_logfold$LogFold, -log10(PE_pval), | |
551 xlab="log2 fold change", ylab=volc_ylab, | |
552 type="n") | |
553 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
554 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") | |
555 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
556 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
557 sel1 <- which(PE_pval<=0.05) | |
558 sel=intersect(sel,sel1) | |
559 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red") | |
560 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
561 sel1 <- which(PE_pval>0.05) | |
562 sel=intersect(sel,sel1) | |
563 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") | |
564 abline(h = -log(0.05,base=10), col="red", lty=2) | |
565 abline(v = log(2,base=2), col="red", lty=2) | |
566 abline(v = log(0.5,base=2), col="red", lty=2) | |
567 dev.off(); | |
568 | |
569 | |
570 | |
571 outplot = paste(outdir,"/TE_volcano.png",sep="",collapse=""); | |
572 png(outplot, width = 10, height = 10, units = 'in', res=300); | |
573 # bitmap(outplot, "png16m"); | |
574 par(mfrow=c(1,1)); | |
575 | |
576 plot(TE_df_logfold$LogFold, -log10(TE_pval), | |
577 xlab="log2 fold change", ylab=volc_ylab, | |
578 type="n") | |
579 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
580 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") | |
581 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
582 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
583 sel1 <- which(TE_pval<=0.05) | |
584 sel=intersect(sel,sel1) | |
585 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red") | |
586 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
587 sel1 <- which(TE_pval>0.05) | |
588 sel=intersect(sel,sel1) | |
589 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") | |
590 abline(h = -log(0.05,base=10), col="red", lty=2) | |
591 abline(v = log(2,base=2), col="red", lty=2) | |
592 abline(v = log(0.5,base=2), col="red", lty=2) | |
593 dev.off(); | |
594 | |
595 | |
596 | |
597 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE); | |
598 cat("<tr><td align=center>", '<img src="TE_volcano.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE); | |
599 cat("<td align=center>", | |
600 '<img src="PE_volcano.png" width=600 height=600></td></tr></table><br>\n', | |
601 file = htmloutfile, append = TRUE); | |
602 }else{ | |
603 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>', | |
604 file = htmloutfile, append = TRUE); | |
605 } | |
606 | |
607 } | 754 } |
608 | 755 |
609 | 756 |
610 #*************************************************************************************************************************************** | 757 #*************************************************************************************************************************************** |
611 # Functions: End | 758 # Functions: End |
612 #*************************************************************************************************************************************** | 759 #*************************************************************************************************************************************** |
613 | |
614 | |
615 | 760 |
616 | 761 |
617 #=============================================================================== | 762 #=============================================================================== |
618 # Arguments | 763 # Arguments |
619 #=============================================================================== | 764 #=============================================================================== |
636 volc_with = args[10]; | 781 volc_with = args[10]; |
637 | 782 |
638 htmloutfile = args[11]; # html output file | 783 htmloutfile = args[11]; # html output file |
639 outdir = args[12]; # html supporting files | 784 outdir = args[12]; # html supporting files |
640 | 785 |
641 | |
642 #=============================================================================== | 786 #=============================================================================== |
643 # Check for file existance | 787 # Check for file existance |
644 #=============================================================================== | 788 #=============================================================================== |
645 if(! file.exists(proteome_file)) | 789 if(! file.exists(proteome_file)) |
646 { | 790 { |
659 suppressPackageStartupMessages(library(dplyr)); | 803 suppressPackageStartupMessages(library(dplyr)); |
660 suppressPackageStartupMessages(library(data.table)); | 804 suppressPackageStartupMessages(library(data.table)); |
661 suppressPackageStartupMessages(library(gplots)); | 805 suppressPackageStartupMessages(library(gplots)); |
662 suppressPackageStartupMessages(library(ggplot2)); | 806 suppressPackageStartupMessages(library(ggplot2)); |
663 suppressPackageStartupMessages(library(ggfortify)); | 807 suppressPackageStartupMessages(library(ggfortify)); |
808 suppressPackageStartupMessages(library(plotly)); | |
809 suppressPackageStartupMessages(library(d3heatmap)); | |
664 | 810 |
665 #=============================================================================== | 811 #=============================================================================== |
666 # Select mode and parse experiment design file | 812 # Select mode and parse experiment design file |
667 #=============================================================================== | 813 #=============================================================================== |
668 if(mode=="multiple") | 814 if(mode=="multiple") |
669 { | 815 { |
670 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); | 816 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); |
671 expDesign_cc = expDesign[1:2,]; | 817 expDesign_cc = expDesign[1:2,]; |
672 | 818 |
673 sampleinfo_df = expDesign[3:nrow(expDesign),]; | 819 sampleinfo_df = expDesign[3:nrow(expDesign),]; |
674 rownames(sampleinfo_df)=1:nrow(sampleinfo_df); | 820 rownames(sampleinfo_df)=1:nrow(sampleinfo_df); |
675 colnames(sampleinfo_df) = c("Sample","Group"); | 821 colnames(sampleinfo_df) = c("Sample","Group"); |
676 | 822 |
677 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; | 823 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; |
678 condition_g_name = "case"; | 824 condition_g_name = "case"; |
679 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; | 825 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; |
680 control_g_name = "control"; | 826 control_g_name = "control"; |
681 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; | 827 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; |
682 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; | 828 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; |
683 sampleinfo_df_orig = sampleinfo_df; | 829 sampleinfo_df_orig = sampleinfo_df; |
684 } | 830 } |
685 | 831 |
686 if(mode=="logfold") | 832 if(mode=="logfold") |
687 { | 833 { |
688 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) | 834 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) |
689 } | 835 } |
690 | 836 |
691 #=============================================================================== | 837 #=============================================================================== |
692 # Parse Transcriptome data | 838 # Parse Transcriptome data |
693 #=============================================================================== | 839 #=============================================================================== |
694 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); | 840 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); |
695 if(mode=="multiple") | 841 if(mode=="multiple") |
696 { | 842 { |
697 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; | 843 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; |
698 } | 844 } |
699 if(mode=="logfold") | 845 if(mode=="logfold") |
700 { | 846 { |
701 TE_df = TE_df_orig; | 847 TE_df = TE_df_orig; |
702 colnames(TE_df) = c("Genes", "LogFold"); | 848 colnames(TE_df) = c("Genes", "LogFold"); |
703 } | 849 } |
704 #=============================================================================== | 850 #=============================================================================== |
705 # Parse Proteome data | 851 # Parse Proteome data |
706 #=============================================================================== | 852 #=============================================================================== |
707 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); | 853 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); |
708 if(mode=="multiple") | 854 if(mode=="multiple") |
709 { | 855 { |
710 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; | 856 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; |
711 } | 857 } |
712 if(mode=="logfold") | 858 if(mode=="logfold") |
713 { | 859 { |
714 PE_df = PE_df_orig; | 860 PE_df = PE_df_orig; |
715 colnames(PE_df) = c("Genes", "LogFold"); | 861 colnames(PE_df) = c("Genes", "LogFold"); |
716 } | 862 } |
717 | 863 |
718 #============================================================================================================= | 864 #============================================================================================================= |
719 # Create directory structures and then set the working directory to output directory | 865 # Create directory structures and then set the working directory to output directory |
720 #============================================================================================================= | 866 #============================================================================================================= |
723 dir.create(outdir); | 869 dir.create(outdir); |
724 } | 870 } |
725 #=============================================================================== | 871 #=============================================================================== |
726 # Write initial data summary in html outfile | 872 # Write initial data summary in html outfile |
727 #=============================================================================== | 873 #=============================================================================== |
728 cat("<html><head></head><body>\n", file = htmloutfile); | 874 cat("<html><head></head><body>\n", file = htmloutfile); |
729 | 875 |
730 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n", | 876 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n", |
731 "<font><h3>Input data summary</h3></font>\n", | 877 "<font><h3>Input data summary</h3></font>\n", |
732 "<ul>\n", | 878 "<ul>\n", |
733 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n", | 879 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n", |
734 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n", | 880 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n", |
735 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n", | 881 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n", |
736 file = htmloutfile, append = TRUE); | 882 file = htmloutfile, append = TRUE); |
737 | 883 |
738 cat("<h3 id=table_of_content>Table of Contents:</h3>\n", | 884 cat("<h3 id=table_of_content>Table of Contents:</h3>\n", |
739 "<ul>\n", | 885 "<ul>\n", |
740 "<li><a href=#sample_dist>Sample distribution</a></li>\n", | 886 "<li><a href=#sample_dist>Sample distribution</a></li>\n", |
741 "<li><a href=#corr_data>Correlation</a></li>\n", | 887 "<li><a href=#corr_data>Correlation</a></li>\n", |
742 "<li><a href=#regression_data>Regression analysis</a></li>\n", | 888 "<li><a href=#regression_data>Regression analysis</a></li>\n", |
743 "<li><a href=#inf_obs>Influential observations</a></li>\n", | 889 "<li><a href=#inf_obs>Influential observations</a></li>\n", |
791 PE_nacount = sum(is.na(PE_df)); | 937 PE_nacount = sum(is.na(PE_df)); |
792 | 938 |
793 TE_df[is.na(TE_df)] = 0; | 939 TE_df[is.na(TE_df)] = 0; |
794 PE_df[is.na(PE_df)] = 0; | 940 PE_df[is.na(PE_df)] = 0; |
795 | 941 |
942 #=============================================================================== | |
943 # Obtain JS/HTML lines for interactive visualization | |
944 #=============================================================================== | |
945 extractWidgetCode = function(outplot){ | |
946 lines <- readLines(gsub("\\.png", "\\.html", outplot)) | |
947 return(list( | |
948 'prescripts' = c('', | |
949 gsub('script', 'script', | |
950 lines[grep('<head>',lines) + 3 | |
951 :grep('</head>' ,lines) - 5]), | |
952 ''), | |
953 'widget_div' = paste('<!--', | |
954 gsub('width:100%;height:400px', | |
955 'width:500px;height:500px', | |
956 lines[grep(lines, pattern='html-widget')]), | |
957 '-->', sep=''), | |
958 'postscripts' = paste('', | |
959 gsub('script', 'script', | |
960 lines[grep(lines, pattern='<script type')]), | |
961 '', sep=''))); | |
962 } | |
963 prescripts <- list() | |
964 postscripts <- list() | |
965 | |
796 | 966 |
797 #=============================================================================== | 967 #=============================================================================== |
798 # Decide based on analysis mode | 968 # Decide based on analysis mode |
799 #=============================================================================== | 969 #=============================================================================== |
800 if(mode=="logfold") | 970 if(mode=="logfold") |
801 { | 971 { |
802 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', | 972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', |
803 file = htmloutfile, append = TRUE); | 973 file = htmloutfile, append = TRUE); |
804 | 974 |
805 # TE Boxplot | 975 # TE Boxplot |
806 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); | 976 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); |
807 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | 977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', |
808 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | 978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', |
809 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | 979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); |
810 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data"); | 980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data"); |
811 | 981 |
812 # PE Boxplot | 982 # PE Boxplot |
813 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); | 983 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); |
814 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | 984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); |
815 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); | 985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); |
816 | 986 |
817 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', | 987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', |
818 file = htmloutfile, append = TRUE); | 988 file = htmloutfile, append = TRUE); |
819 | 989 |
820 # TE PE scatter | 990 # TE PE scatter |
821 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); | 991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); |
822 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); | 992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); |
823 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td></tr>\n', file = htmloutfile, append = TRUE); | 993 singlesample_scatter(PE_TE_data, outplot); |
994 lines <- extractWidgetCode(outplot); | |
995 postscripts <- c(postscripts, lines$postscripts); | |
996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE); | |
824 PE_TE_data = data.frame(PE_df, TE_df); | 997 PE_TE_data = data.frame(PE_df, TE_df); |
825 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | 998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); |
826 singlesample_scatter(PE_TE_data, outplot); | |
827 | 999 |
828 # TE PE Cor | 1000 # TE PE Cor |
829 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); | 1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); |
830 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
831 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', | 1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', |
832 file = htmloutfile, append = TRUE); | 1004 file = htmloutfile, append = TRUE); |
833 cat('</td></table>', | 1005 cat('</td></table>', |
834 file = htmloutfile, append = TRUE); | 1006 file = htmloutfile, append = TRUE); |
835 | 1007 |
836 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', | 1008 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', |
837 file = htmloutfile, append = TRUE); | 1009 file = htmloutfile, append = TRUE); |
838 | 1010 |
839 # TE PE Regression | 1011 # TE PE Regression |
840 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); | 1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); |
1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts, | |
1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts, | |
1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts, | |
1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts, | |
1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts)); | |
841 | 1018 |
842 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', | 1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', |
843 file = htmloutfile, append = TRUE); | 1020 file = htmloutfile, append = TRUE); |
844 | 1021 |
845 # TE PE Heatmap | 1022 # TE PE Heatmap |
846 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); | 1023 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); |
1024 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="")) | |
1025 postscripts <- c(postscripts, lines$postscripts) | |
1026 prescripts <- c(prescripts, lines$prescripts) | |
847 | 1027 |
848 | 1028 |
849 # TE PE Clustering (kmeans) | 1029 # TE PE Clustering (kmeans) |
850 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) | 1030 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) |
1031 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts) | |
851 | 1032 |
852 }else{ | 1033 }else{ |
853 if(mode=="multiple") | 1034 if(mode=="multiple") |
854 { | 1035 { |
855 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', | 1036 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', |
856 file = htmloutfile, append = TRUE); | 1037 file = htmloutfile, append = TRUE); |
857 | 1038 |
858 # TE Boxplot | 1039 # TE Boxplot |
859 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape=""); | 1040 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape=""); |
860 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | |
861 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
862 "<tr><td align=center>", '<img src="Box_TE_all_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | |
863 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); | 1041 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); |
864 colnames(temp_df_te_data) = colnames(TE_df); | 1042 colnames(temp_df_te_data) = colnames(TE_df); |
865 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)"); | 1043 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)"); |
1044 lines <- extractWidgetCode(outplot) | |
1045 prescripts <- c(prescripts, lines$prescripts) | |
1046 postscripts <- c(postscripts, lines$postscripts) | |
1047 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | |
1048 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
1049 "<tr><td align=center>", file = htmloutfile, append = TRUE); | |
1050 cat('<img src="Box_TE_all_rep.png" width=500 height=500>', | |
1051 lines$widget_div, '</td>', file = htmloutfile, append = TRUE); | |
866 | 1052 |
867 # PE Boxplot | 1053 # PE Boxplot |
868 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape=""); | 1054 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape=""); |
869 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
870 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); | 1055 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); |
871 colnames(temp_df_pe_data) = colnames(PE_df); | 1056 colnames(temp_df_pe_data) = colnames(PE_df); |
872 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)"); | 1057 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)"); |
1058 lines <- extractWidgetCode(outplot) | |
1059 #prescripts <- c(prescripts, lines$prescripts) | |
1060 postscripts <- c(postscripts, lines$postscripts) | |
1061 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500>', | |
1062 lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
873 | 1063 |
874 # Calc TE PCA | 1064 # Calc TE PCA |
875 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape=""); | 1065 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape=""); |
876 multisample_PCA(TE_df, sampleinfo_df, outplot); | 1066 multisample_PCA(TE_df, sampleinfo_df, outplot); |
877 | 1067 PCA_TE <- extractWidgetCode(outplot) |
1068 postscripts <- c(postscripts, PCA_TE$postscripts) | |
1069 | |
878 # Calc PE PCA | 1070 # Calc PE PCA |
879 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape=""); | 1071 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape=""); |
880 multisample_PCA(PE_df, sampleinfo_df, outplot); | 1072 multisample_PCA(PE_df, sampleinfo_df, outplot); |
881 | 1073 PCA_PE <- extractWidgetCode(outplot) |
1074 postscripts <- c(postscripts, PCA_PE$postscripts) | |
882 | 1075 |
883 # Replicate mode | 1076 # Replicate mode |
884 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method); | 1077 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method); |
885 TE_df = templist$TE_df_merged; | 1078 TE_df = templist$TE_df_merged; |
886 PE_df = templist$PE_df_merged; | 1079 PE_df = templist$PE_df_merged; |
887 sampleinfo_df = templist$sampleinfo_df_merged; | 1080 sampleinfo_df = templist$sampleinfo_df_merged; |
888 rownames(sampleinfo_df) = sampleinfo_df[,1]; | 1081 rownames(sampleinfo_df) = sampleinfo_df[,1]; |
889 | 1082 |
890 # TE Boxplot | 1083 # TE Boxplot |
891 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape=""); | 1084 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape=""); |
892 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
893 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | |
894 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); | 1085 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); |
895 colnames(temp_df_te_data) = colnames(TE_df); | 1086 colnames(temp_df_te_data) = colnames(TE_df); |
896 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)"); | 1087 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)"); |
897 | 1088 lines <- extractWidgetCode(outplot) |
1089 #prescripts <- c(prescripts, lines$prescripts) | |
1090 postscripts <- c(postscripts, lines$postscripts) | |
1091 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
1092 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE); | |
1093 | |
898 # PE Boxplot | 1094 # PE Boxplot |
899 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape=""); | 1095 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape=""); |
900 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
901 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); | 1096 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); |
902 colnames(temp_df_pe_data) = colnames(PE_df); | 1097 colnames(temp_df_pe_data) = colnames(PE_df); |
903 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)"); | 1098 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)"); |
904 | 1099 lines <- extractWidgetCode(outplot) |
1100 #prescripts <- c(prescripts, lines$prescripts) | |
1101 postscripts <- c(postscripts, lines$postscripts) | |
1102 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500>', lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
1103 | |
905 #=============================================================================== | 1104 #=============================================================================== |
906 # Calculating log fold change and running the "single" code part | 1105 # Calculating log fold change and running the "single" code part |
907 #=============================================================================== | 1106 #=============================================================================== |
908 | 1107 |
909 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); | 1108 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); |
910 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); | 1109 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); |
911 | 1110 |
912 #=============================================================================== | 1111 #=============================================================================== |
913 # Treat missing values | 1112 # Treat missing values |
914 #=============================================================================== | 1113 #=============================================================================== |
915 | 1114 |
916 TE_df[is.infinite(TE_df[,2]),2] = NA; | 1115 TE_df[is.infinite(TE_df[,2]),2] = NA; |
917 PE_df[is.infinite(PE_df[,2]),2] = NA; | 1116 PE_df[is.infinite(PE_df[,2]),2] = NA; |
918 TE_df[is.na(TE_df)] = 0; | 1117 TE_df[is.na(TE_df)] = 0; |
919 PE_df[is.na(PE_df)] = 0; | 1118 PE_df[is.na(PE_df)] = 0; |
920 | 1119 |
921 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) | 1120 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) |
922 #=============================================================================== | 1121 #=============================================================================== |
923 # Find common samples | 1122 # Find common samples |
924 #=============================================================================== | 1123 #=============================================================================== |
925 | 1124 |
926 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]); | 1125 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]); |
927 TE_df = select(TE_df, 1, common_samples); | 1126 TE_df = select(TE_df, 1, common_samples); |
928 PE_df = select(PE_df, 1, common_samples); | 1127 PE_df = select(PE_df, 1, common_samples); |
929 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples); | 1128 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples); |
930 rownames(sampleinfo_df) = sampleinfo_df[,1]; | 1129 rownames(sampleinfo_df) = sampleinfo_df[,1]; |
931 | 1130 |
932 # TE Boxplot | 1131 # TE Boxplot |
933 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); | 1132 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); |
1133 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)"); | |
1134 lines <- extractWidgetCode(outplot) | |
1135 postscripts <- c(postscripts, lines$postscripts) | |
934 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE); | 1136 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE); |
935 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | 1137 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', |
936 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | 1138 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE); |
937 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)"); | |
938 | 1139 |
939 # PE Boxplot | 1140 # PE Boxplot |
940 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); | 1141 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); |
941 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
942 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)"); | 1142 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)"); |
1143 lines <- extractWidgetCode(outplot) | |
1144 postscripts <- c(postscripts, lines$postscripts) | |
1145 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,'</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
943 | 1146 |
944 | 1147 |
945 # Log Fold Data | 1148 # Log Fold Data |
946 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with) | 1149 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with) |
947 | 1150 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/TE_volcano.png",sep="",collapse=""))$postscripts) |
948 | 1151 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_volcano.png",sep="",collapse=""))$postscripts) |
949 | 1152 |
950 # Print PCA | 1153 # Print PCA |
951 | 1154 |
952 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n', | 1155 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n', |
953 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500></td>\n', | 1156 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500>', PCA_TE$widget_div, '</td>\n', |
954 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500></td></tr></table>\n', | 1157 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500>', PCA_PE$widget_div, '</td></tr></table>\n', |
955 file = htmloutfile, append = TRUE); | 1158 file = htmloutfile, append = TRUE); |
956 | 1159 |
957 | 1160 |
958 | 1161 |
959 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', | 1162 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', |
960 file = htmloutfile, append = TRUE); | 1163 file = htmloutfile, append = TRUE); |
1164 | |
1165 PE_TE_data = data.frame(PE_df, TE_df); | |
1166 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | |
961 | 1167 |
962 # TE PE scatter | 1168 # TE PE scatter |
963 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); | 1169 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); |
964 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); | 1170 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); |
965 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td>\n', file = htmloutfile, append = TRUE); | |
966 PE_TE_data = data.frame(PE_df, TE_df); | |
967 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | |
968 singlesample_scatter(PE_TE_data, outplot); | 1171 singlesample_scatter(PE_TE_data, outplot); |
969 | 1172 lines <- extractWidgetCode(outplot); |
1173 postscripts <- c(postscripts, lines$postscripts); | |
1174 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div), | |
1175 '</td>\n', file = htmloutfile, append = TRUE); | |
1176 | |
970 # TE PE Cor | 1177 # TE PE Cor |
971 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE); | 1178 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE); |
972 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 1179 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
973 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', | 1180 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', |
974 file = htmloutfile, append = TRUE); | 1181 file = htmloutfile, append = TRUE); |
975 cat('</td></table>', | 1182 cat('</td></table>', |
976 file = htmloutfile, append = TRUE); | 1183 file = htmloutfile, append = TRUE); |
977 | 1184 |
978 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', | 1185 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', |
979 file = htmloutfile, append = TRUE); | 1186 file = htmloutfile, append = TRUE); |
980 | 1187 |
981 # TE PE Regression | 1188 # TE PE Regression |
982 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); | 1189 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); |
1190 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts, | |
1191 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts, | |
1192 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts, | |
1193 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts, | |
1194 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts, | |
1195 gsub('data-for="html', 'data-for="secondhtml"', | |
1196 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts))); | |
983 | 1197 |
984 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', | 1198 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', |
985 file = htmloutfile, append = TRUE); | 1199 file = htmloutfile, append = TRUE); |
986 | 1200 |
987 #TE PE Heatmap | 1201 #TE PE Heatmap |
988 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); | 1202 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); |
1203 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="")) | |
1204 postscripts <- c(postscripts, lines$postscripts) | |
1205 prescripts <- c(prescripts, lines$prescripts) | |
989 | 1206 |
990 #TE PE Clustering (kmeans) | 1207 #TE PE Clustering (kmeans) |
991 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) | 1208 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) |
992 | 1209 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts); |
993 } | 1210 } |
994 } | 1211 } |
995 cat("<h3>Go To:</h3>\n", | 1212 cat("<h3>Go To:</h3>\n", |
996 "<ul>\n", | 1213 "<ul>\n", |
997 "<li><a href=#sample_dist>Sample distribution</a></li>\n", | 1214 "<li><a href=#sample_dist>Sample distribution</a></li>\n", |
1000 "<li><a href=#inf_obs>Influential observations</a></li>\n", | 1217 "<li><a href=#inf_obs>Influential observations</a></li>\n", |
1001 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n", | 1218 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n", |
1002 "<br><a href=#>TOP</a>", | 1219 "<br><a href=#>TOP</a>", |
1003 file = htmloutfile, append = TRUE); | 1220 file = htmloutfile, append = TRUE); |
1004 cat("</body></html>\n", file = htmloutfile, append = TRUE); | 1221 cat("</body></html>\n", file = htmloutfile, append = TRUE); |
1222 | |
1223 | |
1224 #=============================================================================== | |
1225 # Add masked-javascripts tags to HTML file in the head and end | |
1226 #=============================================================================== | |
1227 | |
1228 htmllines <- readLines(htmloutfile) | |
1229 htmllines[1] <- paste('<html>\n<head>\n', paste(prescripts, collapse='\n'), '\n</head>\n<body>') | |
1230 cat(paste(htmllines, collapse='\n'), file = htmloutfile) | |
1231 cat('\n', paste(postscripts, collapse='\n'), "\n", | |
1232 "</body>\n</html>\n", file = htmloutfile, append = TRUE); |