comparison quantp.r @ 1:bcc7a4c4cc29 draft

planemo upload commit 1887dff812162880d66b003a927867cd5000c98f
author galaxyp
date Thu, 20 Dec 2018 16:06:05 -0500
parents 75faf9a89f5b
children ed0bb50d7ffe
comparison
equal deleted inserted replaced
0:75faf9a89f5b 1:bcc7a4c4cc29
14 tempdf[is.na(tempdf)] = 0; 14 tempdf[is.na(tempdf)] = 0;
15 tempdf$Group = tempgrp; 15 tempdf$Group = tempgrp;
16 png(outfile, width = 6, height = 6, units = 'in', res=300); 16 png(outfile, width = 6, height = 6, units = 'in', res=300);
17 # bitmap(outfile, "png16m"); 17 # bitmap(outfile, "png16m");
18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); 18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3);
19 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot)))
19 plot(g); 20 plot(g);
20 dev.off(); 21 dev.off();
21 } 22 }
22 23
23 #=============================================================================== 24 #===============================================================================
28 rownames(PE_TE_data) = PE_TE_data$PE_ID; 29 rownames(PE_TE_data) = PE_TE_data$PE_ID;
29 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data); 30 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data);
30 regmodel_summary = summary(regmodel); 31 regmodel_summary = summary(regmodel);
31 32
32 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n", 33 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n",
33 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n", 34 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n",
34 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', 35 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
35 file = htmloutfile, append = TRUE); 36 file = htmloutfile, append = TRUE);
36 37
37 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n", 38 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n",
38 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n", 39 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n",
39 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n", 40 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n",
40 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n", 41 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n",
41 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n", 42 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n",
42 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n", 43 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n",
43 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n", 44 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n",
44 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n", 45 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n",
45 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n", 46 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n",
46 file = htmloutfile, append = TRUE); 47 file = htmloutfile, append = TRUE);
47 48
48 cat("</table>\n", file = htmloutfile, append = TRUE); 49 cat("</table>\n", file = htmloutfile, append = TRUE);
49 50
50 cat( 51 cat(
51 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n", 52 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n",
56 # bitmap(outplot, "png16m"); 57 # bitmap(outplot, "png16m");
57 par(mfrow=c(1,1)); 58 par(mfrow=c(1,1));
58 plot(regmodel, 1, cex.lab=1.5); 59 plot(regmodel, 1, cex.lab=1.5);
59 dev.off(); 60 dev.off();
60 61
62 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] +
63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)),
64 shape = 1, size = .1, stroke = .2) +
65 theme_light())
66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
67
61 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); 68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse="");
62 png(outplot,width = 10, height = 10, units = 'in', res=300); 69 png(outplot,width = 10, height = 10, units = 'in', res=300);
63 # bitmap(outplot, "png16m"); 70 # bitmap(outplot, "png16m");
64 par(mfrow=c(1,1)); 71 par(mfrow=c(1,1));
65 plot(regmodel, 2, cex.lab=1.5); 72 g <- plot(regmodel, 2, cex.lab=1.5);
73 ggplotly(g)
66 dev.off(); 74 dev.off();
75
76 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] +
77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)),
78 shape = 1, size = .1) +
79 theme_light())
80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
81
67 82
68 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); 83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse="");
69 png(outplot, width = 10, height = 10, units = 'in',res=300); 84 png(outplot, width = 10, height = 10, units = 'in',res=300);
70 # bitmap(outplot, "png16m"); 85 # bitmap(outplot, "png16m");
71 par(mfrow=c(1,1)); 86 par(mfrow=c(1,1));
72 plot(regmodel, 5, cex.lab=1.5); 87 plot(regmodel, 5, cex.lab=1.5);
73 dev.off(); 88 dev.off();
74 89
90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)}
91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)}
92
93 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] +
94 aes(label = PE_TE_data$PE_ID) +
95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) +
96 theme_light())
97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
98
75 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); 99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
76 100
77 cat( 101 cat(
78 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n", 102 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n",
79 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n', 103 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n',
80 file = htmloutfile, append = TRUE); 104 file = htmloutfile, append = TRUE);
81 105
82 cat( 106 cat(
83 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600></td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600></td></tr>\n', 107 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600>',
84 file = htmloutfile, append = TRUE); 108 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div),
109 '</td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600>',
110 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div),
111 '</td></tr>\n', file = htmloutfile, append = TRUE);
85 112
86 cat( 113 cat(
87 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n', 114 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n',
88 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n', 115 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n',
89 file = htmloutfile, append = TRUE); 116 file = htmloutfile, append = TRUE);
90 117
91 118
92 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 119 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
93 # Residuals data 120 # Residuals data
94 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 121 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
95 res_all = regmodel$residuals; 122 res_all = regmodel$residuals;
96 res_mean = mean(res_all); 123 res_mean = mean(res_all);
97 res_sd = sd(res_all); 124 res_sd = sd(res_all);
98 res_diff = (res_all-res_mean); 125 res_diff = (res_all-res_mean);
99 res_zscore = res_diff/res_sd; 126 res_zscore = res_diff/res_sd;
100 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] 127 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))]
101 128
102 129
103 tempind = which((res_zscore > 2)|(res_zscore < -2)); 130 tempind = which((res_zscore > 2)|(res_zscore < -2));
104 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; 131 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,];
105 res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; 132 res_PE_TE_data_no_outlier$residuals = res_all[-tempind];
106 res_PE_TE_data_outlier = PE_TE_data[tempind,]; 133 res_PE_TE_data_outlier = PE_TE_data[tempind,];
107 res_PE_TE_data_outlier$residuals = res_all[tempind]; 134 res_PE_TE_data_outlier$residuals = res_all[tempind];
108 135
109 # Save the complete table for download (influential_observations) 136 # Save the complete table for download (influential_observations)
110 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) 137 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals)
111 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") 138 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
112 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); 139 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse="");
113 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); 140 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
114 141
115 142
116 # Save the complete table for download (non influential_observations) 143 # Save the complete table for download (non influential_observations)
117 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) 144 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all)
118 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") 145 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
119 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); 146 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse="");
120 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); 147 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F);
121 148
122 149
123 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n', 150 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n',
124 file = htmloutfile, append = TRUE); 151 file = htmloutfile, append = TRUE);
125 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', 152 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
126 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n', 153 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n',
127 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', 154 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
128 file = htmloutfile, append = TRUE); 155 file = htmloutfile, append = TRUE);
129 156
130 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n", 157 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n",
131 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n", 158 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n",
132 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href=PE_TE_outliers_residuals.txt target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n', 159 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href="PE_TE_outliers_residuals.txt" target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n',
133 '<tr><td colspan=2 align=center><font size=4>(<b><a href=PE_TE_abundance_residuals.txt target="_blank">Download the complete residuals data here</a></b>)</font></td></td>\n', 160 '<tr><td colspan=2 align=center>',
134 "</table><br><br>\n", 161 '<font size=4>(<b><a href="PE_TE_abundance_residuals.txt" target="_blank">Download the complete residuals data here</a></b>)</font>',
135 file = htmloutfile, append = TRUE); 162 "</td></tr>\n</table><br><br>\n",
136 163 file = htmloutfile, append = TRUE);
164
137 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 165 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
138 166
139 167
140 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); 168 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
141 169
142 cat( 170 cat(
143 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n', 171 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n',
144 file = htmloutfile, append = TRUE); 172 file = htmloutfile, append = TRUE);
145 173
146 cat( 174 cat(
147 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600></td></tr>\n', 175 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600>',
176 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div)
177 , '</td></tr>\n',
148 file = htmloutfile, append = TRUE); 178 file = htmloutfile, append = TRUE);
149 179
150 cat( 180 cat(
151 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n', 181 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n',
152 file = htmloutfile, append = TRUE); 182 file = htmloutfile, append = TRUE);
155 185
156 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 186 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
157 # Cook's Distance 187 # Cook's Distance
158 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 188 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
159 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n', 189 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n',
160 file = htmloutfile, append = TRUE); 190 file = htmloutfile, append = TRUE);
161 cat( 191 cat(
162 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n', 192 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n',
163 file = htmloutfile, append = TRUE); 193 file = htmloutfile, append = TRUE);
164 194
165 cooksd <- cooks.distance(regmodel); 195 cooksd <- cooks.distance(regmodel);
175 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black") 205 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black")
176 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line 206 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line
177 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels 207 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels
178 dev.off(); 208 dev.off();
179 209
210 cooksd_df <- data.frame(cooksd)
211 cooksd_df$genes <- row.names(cooksd_df)
212 cooksd_df$index <- 1:nrow(cooksd_df)
213 cooksd_df$colors <- "black"
214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)
215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red"
216
217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors),
218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) +
219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") +
220 #xlim(0, 3000) + ylim(0, .15) +
221 scale_shape_discrete(solid=F) +
222 geom_point(size = 2, shape = 8) +
223 geom_hline(yintercept = cutoff,
224 linetype = "dashed", color = "red") +
225 scale_color_manual(values = c("black" = "black", "red" = "red")) +
226 theme_light() + theme(legend.position="none")
227 saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot)))
228
180 cat( 229 cat(
181 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>', 230 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>',
231 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
182 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>', 232 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>',
183 file = htmloutfile, append = TRUE); 233 file = htmloutfile, append = TRUE);
184 234
185 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); 235 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T));
186 PE_TE_data_no_outlier = PE_TE_data[-tempind,]; 236 PE_TE_data_no_outlier = PE_TE_data[-tempind,];
187 PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; 237 PE_TE_data_no_outlier$cooksd = cooksd[-tempind];
188 PE_TE_data_outlier = PE_TE_data[tempind,]; 238 PE_TE_data_outlier = PE_TE_data[tempind,];
189 PE_TE_data_outlier$cooksd = cooksd[tempind]; 239 PE_TE_data_outlier$cooksd = cooksd[tempind];
193 cat( 243 cat(
194 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', 244 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
195 file = htmloutfile, append = TRUE); 245 file = htmloutfile, append = TRUE);
196 246
197 # Save the complete table for download (influential_observations) 247 # Save the complete table for download (influential_observations)
198 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) 248 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd)
199 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") 249 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
200 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); 250 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse="");
201 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); 251 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
202 252
203 253
204 # Save the complete table for download (non influential_observations) 254 # Save the complete table for download (non influential_observations)
205 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) 255 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd)
206 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") 256 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
207 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); 257 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse="");
208 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); 258 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
209 259
210 260
211 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n", 261 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n",
212 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n", 262 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n",
213 263
214 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n", 264 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n",
215 "</table><br><br>\n", 265 "</table><br><br>\n",
216 file = htmloutfile, append = TRUE); 266 file = htmloutfile, append = TRUE);
217 267
218 268
219 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 269 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
220 # Scatter plot after removal of influential points 270 # Scatter plot after removal of influential points
221 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 271 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
222 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); 272 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
223 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
224 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
225 png(outplot, width = 10, height = 10, units = 'in', res=300); 275 png(outplot, width = 10, height = 10, units = 'in', res=300);
226 # bitmap(outplot,"png16m"); 276 # bitmap(outplot,"png16m");
227 g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); 277 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
228 suppressMessages(plot(g)); 278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
229 dev.off(); 279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
230 280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
231 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE); 281 PE_ID, TE_abundance, PE_abundance))))
232 # Before 282 suppressMessages(plot(g))
233 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->", '<img src="TE_PE_scatter.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE); 283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot))))
234 284 dev.off();
235 # After 285
236 cat("<td align=center>\n", 286
237 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600></td></tr>\n', 287 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE);
238 file = htmloutfile, append = TRUE); 288 # Before
239 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 289 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->",
240 290 '<img src="TE_PE_scatter.png" width=600 height=600>',
291 gsub('id="html', 'id="secondhtml"',
292 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)),
293 '</td>\n',
294 file = htmloutfile, append = TRUE);
295
296 # After
297 cat("<td align=center>\n",
298 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600>',
299 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div),
300
301 '</td></tr>\n',
302 file = htmloutfile, append = TRUE);
303 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
304
241 305
242 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); 306 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson");
243 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); 307 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman");
244 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall"); 308 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall");
245 309
247 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); 311 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
248 cat('</td>\n', file = htmloutfile, append=TRUE); 312 cat('</td>\n', file = htmloutfile, append=TRUE);
249 313
250 314
251 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n', 315 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n',
252 file = htmloutfile, append = TRUE); 316 file = htmloutfile, append = TRUE);
253 317
254 cat( 318 cat(
255 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n", 319 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n",
256 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n", 320 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n",
257 file = htmloutfile, append = TRUE) 321 file = htmloutfile, append = TRUE)
265 }else{ 329 }else{
266 tab_n_row = 10; 330 tab_n_row = 10;
267 } 331 }
268 332
269 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font>&nbsp;&nbsp;&nbsp;&nbsp;", 333 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font>&nbsp;&nbsp;&nbsp;&nbsp;",
270 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n", 334 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n",
271 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n', 335 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n',
272 file = htmloutfile, append = TRUE); 336 file = htmloutfile, append = TRUE);
273 337
274 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE); 338 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE);
275 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n", 339 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n",
276 file = htmloutfile, append = TRUE); 340 file = htmloutfile, append = TRUE);
277 341
278 342
279 for(i in 1:tab_n_row) 343 for(i in 1:tab_n_row)
280 { 344 {
281 cat( 345 cat(
283 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n', 347 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n',
284 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n', 348 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n',
285 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n', 349 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n',
286 file = htmloutfile, append = TRUE); 350 file = htmloutfile, append = TRUE);
287 } 351 }
288 cat('</table><br><br>\n',file = htmloutfile, append = TRUE); 352 cat('</table><br><br>\n',file = htmloutfile, append = TRUE);
289 353
290 354
291 } 355 }
292 356
293 357
294 358
295 #=============================================================================== 359 #===============================================================================
296 # Heatmap 360 # Heatmap
297 #=============================================================================== 361 #===============================================================================
298 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ 362 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){
299 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n', 363 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n',
300 file = htmloutfile, append = TRUE); 364 file = htmloutfile, append = TRUE);
301 365
302 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) 366 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")])))
303 hm_cluster = cutree(hc,k=hm_nclust); 367 hm_cluster = cutree(hc,k=hm_nclust);
304 368
305 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); 369 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
306 png(outplot, width = 10, height = 10, units = 'in', res=300); 370 png(outplot, width = 10, height = 10, units = 'in', res=300);
307 # bitmap(outplot, "png16m"); 371 # bitmap(outplot, "png16m");
308 par(mfrow=c(1,1)); 372 par(mfrow=c(1,1));
309 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("Proteins","Transcripts"), scale="col", hclustfun = hclust, distfun = dist); 373 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]),
374 trace="none", cexCol=1, col=greenred(100),Colv=F,
375 labCol=c("Proteins","Transcripts"), scale="col",
376 hclustfun = hclust, distfun = dist);
377
310 dev.off(); 378 dev.off();
311 379
312 cat('<tr><td align=center colspan="2"><img src="PE_TE_heatmap.png" width=800 height=800></td></tr>\n', 380
313 file = htmloutfile, append = TRUE); 381 p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col",
382 dendrogram = "row", colors = greenred(100),
383 hclustfun = hclust, distfun = dist,
384 show_grid = FALSE)
385 saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot)))
386
387 cat('<tr><td align=center colspan="2">',
388 '<img src="PE_TE_heatmap.png" width=800 height=800>',
389 gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
390 '</td></tr>\n',
391 file = htmloutfile, append = TRUE);
314 392
315 393
316 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); 394 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster);
317 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)") 395 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)")
318 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse=""); 396 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse="");
319 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") 397 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
320 398
321 399
322 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n', 400 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n',
323 file = htmloutfile, append = TRUE); 401 file = htmloutfile, append = TRUE);
324 } 402 }
325 403
326 404
327 #=============================================================================== 405 #===============================================================================
328 # K-means clustering 406 # K-means clustering
335 # bitmap(outplot, "png16m"); 413 # bitmap(outplot, "png16m");
336 par(mfrow=c(1,1)); 414 par(mfrow=c(1,1));
337 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); 415 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
338 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) 416 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8)
339 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) 417 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8)
340
341 418
342 ind=which(k1$cluster==1); 419 ind=which(k1$cluster==1);
343 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); 420 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16);
344 ind=which(k1$cluster==2); 421 ind=which(k1$cluster==2);
345 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16); 422 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16);
359 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16); 436 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16);
360 ind=which(k1$cluster==10); 437 ind=which(k1$cluster==10);
361 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); 438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16);
362 dev.off(); 439 dev.off();
363 440
441 # Interactive plot for k-means clustering
442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data),
443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f",
444 PE_ID, TE_abundance, PE_abundance),
445 color=as.factor(k1$cluster))) +
446 xlab("Transcript Abundance") + ylab("Protein Abundance") +
447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) +
448 geom_point(size = 1, shape = 8) +
449 theme_light() + theme(legend.position="none")
450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
451
364 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n', 452 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n',
365 file = htmloutfile, append = TRUE); 453 file = htmloutfile, append = TRUE);
366 454
367 tempind = order(k1$cluster); 455 tempind = order(k1$cluster);
368 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); 456 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse="");
369 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") 457 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
370 458
371 459 #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
372 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800></td></tr>\n', 460 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800>',
373 file = htmloutfile, append = TRUE); 461 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '</td></tr>\n',
462 file = htmloutfile, append = TRUE);
374 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n', 463 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n',
375 file = htmloutfile, append = TRUE); 464 file = htmloutfile, append = TRUE);
376 465
377 } 466 }
378 467
379 #=============================================================================== 468 #===============================================================================
380 # scatter plot 469 # scatter plot
383 { 472 {
384 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
385 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); 474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
386 png(outfile, width = 10, height = 10, units = 'in', res=300); 475 png(outfile, width = 10, height = 10, units = 'in', res=300);
387 # bitmap(outfile, "png16m"); 476 # bitmap(outfile, "png16m");
388 g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); 477 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
389 suppressMessages(plot(g)); 478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
390 # plot(g); 479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
481 PE_ID, TE_abundance, PE_abundance)),
482 size = .5))
483 suppressMessages(plot(g))
484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile))))
391 dev.off(); 485 dev.off();
392 } 486 }
393 487
394 #=============================================================================== 488 #===============================================================================
395 # Correlation table 489 # Correlation table
420 tempdf = df[,-1, drop=FALSE]; 514 tempdf = df[,-1, drop=FALSE];
421 tempdf = t(tempdf) %>% as.data.frame(); 515 tempdf = t(tempdf) %>% as.data.frame();
422 tempdf[is.na(tempdf)] = 0; 516 tempdf[is.na(tempdf)] = 0;
423 tempdf$Sample = rownames(tempdf); 517 tempdf$Sample = rownames(tempdf);
424 tempdf1 = melt(tempdf, id.vars = "Sample"); 518 tempdf1 = melt(tempdf, id.vars = "Sample");
519
520 if("Gene" %in% colnames(df)){
521 tempdf1$Name = df$Gene;
522 } else if ("Protein" %in% colnames(df)){
523 tempdf1$Name = df$Protein;
524 } else if ("Genes" %in% colnames(df)){
525 tempdf1$Name = df$Genes;
526 }
527
425 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; 528 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2];
426 png(outplot, width = 6, height = 6, units = 'in', res=300); 529 png(outplot, width = 6, height = 6, units = 'in', res=300);
427 # bitmap(outplot, "png16m"); 530 # bitmap(outplot, "png16m");
428 if(fill_leg=="Yes") 531 if(fill_leg=="No"){
429 { 532 tempdf1$Group = c("case", "control")
430 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) 533 }
534
535 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) +
536 geom_boxplot()+
537 labs(x=user_xlab) + labs(y=user_ylab)
538 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile)))
539 plot(g);
540 dev.off();
541 }
542
543 ## A wrapper to saveWidget which compensates for arguable BUG in
544 ## saveWidget which requires `file` to be in current working
545 ## directory.
546 saveWidget <- function (widget,file,...) {
547 wd<-getwd()
548 on.exit(setwd(wd))
549 outDir<-dirname(file)
550 file<-basename(file)
551 setwd(outDir);
552 htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE)
553 }
554
555 #===============================================================================
556 # Mean or Median of Replicates
557 #===============================================================================
558
559 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method)
560 {
561 grps = unique(sampleinfo_df[,2]);
562
563 TE_df_merged <<- sapply(grps, function(x){
564 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
565 if(length(tempsample)!=1){
566 apply(TE_df[,tempsample],1,method);
567 }else{
568 return(TE_df[,tempsample]);
569 }
570 });
571 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged);
572 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps);
573
574 PE_df_merged <<- sapply(grps, function(x){
575 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
576 if(length(tempsample)!=1){
577 apply(PE_df[,tempsample],1,method);
578 }else{
579 return(PE_df[,tempsample]);
580 }
581 });
582
583 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged);
584 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps);
585
586 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F);
587 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F);
588
589 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged));
590 }
591
592 #===============================================================================
593 # (T-Test or Wilcoxon ranksum test) and Volcano Plot
594 #===============================================================================
595
596 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with)
597 {
598
599 PE_colnames = colnames(PE_df_data);
600 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
601 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
602 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
603 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
604
605 if(method=="mean"){
606 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
607 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
431 }else{ 608 }else{
432 if(fill_leg=="No") 609 if(method=="median"){
433 { 610 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
434 tempdf1$Group = c("case", "control") 611 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
435 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab)
436 } 612 }
437 } 613 }
438 plot(g); 614 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval))
439 dev.off(); 615
440 } 616
441 617 TE_colnames = colnames(TE_df_data);
442 618 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
443 #=============================================================================== 619 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
444 # Mean or Median of Replicates 620 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
445 #=============================================================================== 621 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
446 622
447 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) 623 if(method=="mean"){
448 { 624 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
449 grps = unique(sampleinfo_df[,2]); 625 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
450
451 TE_df_merged <<- sapply(grps, function(x){
452 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
453 if(length(tempsample)!=1){
454 apply(TE_df[,tempsample],1,method);
455 }else{ 626 }else{
456 return(TE_df[,tempsample]); 627 if(method=="median"){
628 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
629 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
630 }
457 } 631 }
458 }); 632 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval))
459 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); 633
460 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); 634
461 635 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval);
462 PE_df_merged <<- sapply(grps, function(x){ 636 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)");
463 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] 637 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse="");
464 if(length(tempsample)!=1){ 638 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F);
465 apply(PE_df[,tempsample],1,method); 639 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n",
640 file = htmloutfile, append = TRUE);
641
642 if(length(condition_ind)!=1)
643 {
644 # Volcano Plot
645
646 if(volc_with=="adj_pval")
647 {
648 PE_pval = PE_adj_pval
649 TE_pval = TE_adj_pval
650 volc_ylab = "-log10 Adjusted p-value";
651 }else{
652 if(volc_with=="pval")
653 {
654 volc_ylab = "-log10 p-value";
655 }
656 }
657 outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse="");
658 png(outplot_PE, width = 10, height = 10, units = 'in', res=300);
659 # bitmap(outplot, "png16m");
660 par(mfrow=c(1,1));
661
662 plot(PE_df_logfold$LogFold, -log10(PE_pval),
663 xlab="log2 fold change", ylab=volc_ylab,
664 type="n")
665 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
666 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black")
667 PE_df_logfold$color <- "black"
668 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
669 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
670 sel1 <- which(PE_pval<=0.05)
671 sel=intersect(sel,sel1)
672 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red")
673 PE_df_logfold[sel,]$color <- "red"
674 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
675 sel1 <- which(PE_pval>0.05)
676 sel=intersect(sel,sel1)
677 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue")
678 PE_df_logfold[sel,]$color <- "blue"
679 abline(h = -log(0.05,base=10), col="red", lty=2)
680 abline(v = log(2,base=2), col="red", lty=2)
681 abline(v = log(0.5,base=2), col="red", lty=2)
682 dev.off();
683
684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color),
685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
686 Genes, LogFold, -log10(PE_pval), PE_pval))) +
687 xlab("log2 fold change") + ylab("-log10 p-value") +
688 geom_point(shape=1, size = 1.5, stroke = .2) +
689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
692 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
693 theme_light() + theme(legend.position="none")
694 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE)))
695
696 outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse="");
697 png(outplot_TE, width = 10, height = 10, units = 'in', res=300);
698 # bitmap(outplot, "png16m");
699 par(mfrow=c(1,1));
700
701 plot(TE_df_logfold$LogFold, -log10(TE_pval),
702 xlab="log2 fold change", ylab=volc_ylab,
703 type="n")
704
705 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
706 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black")
707 TE_df_logfold$color <- "black"
708 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
709 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
710 sel1 <- which(TE_pval<=0.05)
711 sel=intersect(sel,sel1)
712 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red")
713 TE_df_logfold[sel,]$color <- "red"
714 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
715 sel1 <- which(TE_pval>0.05)
716 sel=intersect(sel,sel1)
717 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue")
718 TE_df_logfold[sel,]$color <- "blue"
719 abline(h = -log(0.05,base=10), col="red", lty=2)
720 abline(v = log(2,base=2), col="red", lty=2)
721 abline(v = log(0.5,base=2), col="red", lty=2)
722 dev.off();
723
724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color),
725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
726 Genes, LogFold, -log10(TE_pval), TE_pval))) +
727 xlab("log2 fold change") + ylab("-log10 p-value") +
728 geom_point(shape=1, size = 1.5, stroke = .2) +
729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
731 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
732 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
733 theme_light() + theme(legend.position="none")
734 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE)))
735
736
737 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE);
738 cat("<tr><td align=center>",
739 '<img src="TE_volcano.png" width=600 height=600>',
740 extractWidgetCode(outplot_TE)$widget_div,
741 '</td>\n', file = htmloutfile, append = TRUE);
742 cat("<td align=center>",
743 '<img src="PE_volcano.png" width=600 height=600>',
744 extractWidgetCode(outplot_PE)$widget_div,
745 '</td></tr></table><br>\n',
746 file = htmloutfile, append = TRUE);
747
748
466 }else{ 749 }else{
467 return(PE_df[,tempsample]); 750 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>',
751 file = htmloutfile, append = TRUE);
468 } 752 }
469 }); 753
470
471 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged);
472 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps);
473
474 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F);
475 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F);
476
477 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged));
478 }
479
480 #===============================================================================
481 # (T-Test or Wilcoxon ranksum test) and Volcano Plot
482 #===============================================================================
483
484 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with)
485 {
486
487 PE_colnames = colnames(PE_df_data);
488 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
489 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
490 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
491 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
492
493 if(method=="mean"){
494 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
495 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
496 }else{
497 if(method=="median"){
498 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
499 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
500 }
501 }
502 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval))
503
504
505 TE_colnames = colnames(TE_df_data);
506 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
507 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
508 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
509 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
510
511 if(method=="mean"){
512 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
513 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
514 }else{
515 if(method=="median"){
516 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
517 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
518 }
519 }
520 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval))
521
522
523 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval);
524 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)");
525 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse="");
526 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F);
527 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n",
528 file = htmloutfile, append = TRUE);
529
530 if(length(condition_ind)!=1)
531 {
532 # Volcano Plot
533
534 if(volc_with=="adj_pval")
535 {
536 PE_pval = PE_adj_pval
537 TE_pval = TE_adj_pval
538 volc_ylab = "-log10 Adjusted p-value";
539 }else{
540 if(volc_with=="pval")
541 {
542 volc_ylab = "-log10 p-value";
543 }
544 }
545 outplot = paste(outdir,"/PE_volcano.png",sep="",collapse="");
546 png(outplot, width = 10, height = 10, units = 'in', res=300);
547 # bitmap(outplot, "png16m");
548 par(mfrow=c(1,1));
549
550 plot(PE_df_logfold$LogFold, -log10(PE_pval),
551 xlab="log2 fold change", ylab=volc_ylab,
552 type="n")
553 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
554 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black")
555 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
556 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
557 sel1 <- which(PE_pval<=0.05)
558 sel=intersect(sel,sel1)
559 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red")
560 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
561 sel1 <- which(PE_pval>0.05)
562 sel=intersect(sel,sel1)
563 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue")
564 abline(h = -log(0.05,base=10), col="red", lty=2)
565 abline(v = log(2,base=2), col="red", lty=2)
566 abline(v = log(0.5,base=2), col="red", lty=2)
567 dev.off();
568
569
570
571 outplot = paste(outdir,"/TE_volcano.png",sep="",collapse="");
572 png(outplot, width = 10, height = 10, units = 'in', res=300);
573 # bitmap(outplot, "png16m");
574 par(mfrow=c(1,1));
575
576 plot(TE_df_logfold$LogFold, -log10(TE_pval),
577 xlab="log2 fold change", ylab=volc_ylab,
578 type="n")
579 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
580 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black")
581 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
582 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
583 sel1 <- which(TE_pval<=0.05)
584 sel=intersect(sel,sel1)
585 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red")
586 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
587 sel1 <- which(TE_pval>0.05)
588 sel=intersect(sel,sel1)
589 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue")
590 abline(h = -log(0.05,base=10), col="red", lty=2)
591 abline(v = log(2,base=2), col="red", lty=2)
592 abline(v = log(0.5,base=2), col="red", lty=2)
593 dev.off();
594
595
596
597 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE);
598 cat("<tr><td align=center>", '<img src="TE_volcano.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE);
599 cat("<td align=center>",
600 '<img src="PE_volcano.png" width=600 height=600></td></tr></table><br>\n',
601 file = htmloutfile, append = TRUE);
602 }else{
603 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>',
604 file = htmloutfile, append = TRUE);
605 }
606
607 } 754 }
608 755
609 756
610 #*************************************************************************************************************************************** 757 #***************************************************************************************************************************************
611 # Functions: End 758 # Functions: End
612 #*************************************************************************************************************************************** 759 #***************************************************************************************************************************************
613
614
615 760
616 761
617 #=============================================================================== 762 #===============================================================================
618 # Arguments 763 # Arguments
619 #=============================================================================== 764 #===============================================================================
636 volc_with = args[10]; 781 volc_with = args[10];
637 782
638 htmloutfile = args[11]; # html output file 783 htmloutfile = args[11]; # html output file
639 outdir = args[12]; # html supporting files 784 outdir = args[12]; # html supporting files
640 785
641
642 #=============================================================================== 786 #===============================================================================
643 # Check for file existance 787 # Check for file existance
644 #=============================================================================== 788 #===============================================================================
645 if(! file.exists(proteome_file)) 789 if(! file.exists(proteome_file))
646 { 790 {
659 suppressPackageStartupMessages(library(dplyr)); 803 suppressPackageStartupMessages(library(dplyr));
660 suppressPackageStartupMessages(library(data.table)); 804 suppressPackageStartupMessages(library(data.table));
661 suppressPackageStartupMessages(library(gplots)); 805 suppressPackageStartupMessages(library(gplots));
662 suppressPackageStartupMessages(library(ggplot2)); 806 suppressPackageStartupMessages(library(ggplot2));
663 suppressPackageStartupMessages(library(ggfortify)); 807 suppressPackageStartupMessages(library(ggfortify));
808 suppressPackageStartupMessages(library(plotly));
809 suppressPackageStartupMessages(library(d3heatmap));
664 810
665 #=============================================================================== 811 #===============================================================================
666 # Select mode and parse experiment design file 812 # Select mode and parse experiment design file
667 #=============================================================================== 813 #===============================================================================
668 if(mode=="multiple") 814 if(mode=="multiple")
669 { 815 {
670 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); 816 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame();
671 expDesign_cc = expDesign[1:2,]; 817 expDesign_cc = expDesign[1:2,];
672 818
673 sampleinfo_df = expDesign[3:nrow(expDesign),]; 819 sampleinfo_df = expDesign[3:nrow(expDesign),];
674 rownames(sampleinfo_df)=1:nrow(sampleinfo_df); 820 rownames(sampleinfo_df)=1:nrow(sampleinfo_df);
675 colnames(sampleinfo_df) = c("Sample","Group"); 821 colnames(sampleinfo_df) = c("Sample","Group");
676 822
677 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; 823 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1];
678 condition_g_name = "case"; 824 condition_g_name = "case";
679 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; 825 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1];
680 control_g_name = "control"; 826 control_g_name = "control";
681 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; 827 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case";
682 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; 828 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control";
683 sampleinfo_df_orig = sampleinfo_df; 829 sampleinfo_df_orig = sampleinfo_df;
684 } 830 }
685 831
686 if(mode=="logfold") 832 if(mode=="logfold")
687 { 833 {
688 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) 834 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
689 } 835 }
690 836
691 #=============================================================================== 837 #===============================================================================
692 # Parse Transcriptome data 838 # Parse Transcriptome data
693 #=============================================================================== 839 #===============================================================================
694 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); 840 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
695 if(mode=="multiple") 841 if(mode=="multiple")
696 { 842 {
697 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; 843 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)];
698 } 844 }
699 if(mode=="logfold") 845 if(mode=="logfold")
700 { 846 {
701 TE_df = TE_df_orig; 847 TE_df = TE_df_orig;
702 colnames(TE_df) = c("Genes", "LogFold"); 848 colnames(TE_df) = c("Genes", "LogFold");
703 } 849 }
704 #=============================================================================== 850 #===============================================================================
705 # Parse Proteome data 851 # Parse Proteome data
706 #=============================================================================== 852 #===============================================================================
707 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); 853 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
708 if(mode=="multiple") 854 if(mode=="multiple")
709 { 855 {
710 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; 856 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)];
711 } 857 }
712 if(mode=="logfold") 858 if(mode=="logfold")
713 { 859 {
714 PE_df = PE_df_orig; 860 PE_df = PE_df_orig;
715 colnames(PE_df) = c("Genes", "LogFold"); 861 colnames(PE_df) = c("Genes", "LogFold");
716 } 862 }
717 863
718 #============================================================================================================= 864 #=============================================================================================================
719 # Create directory structures and then set the working directory to output directory 865 # Create directory structures and then set the working directory to output directory
720 #============================================================================================================= 866 #=============================================================================================================
723 dir.create(outdir); 869 dir.create(outdir);
724 } 870 }
725 #=============================================================================== 871 #===============================================================================
726 # Write initial data summary in html outfile 872 # Write initial data summary in html outfile
727 #=============================================================================== 873 #===============================================================================
728 cat("<html><head></head><body>\n", file = htmloutfile); 874 cat("<html><head></head><body>\n", file = htmloutfile);
729 875
730 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n", 876 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n",
731 "<font><h3>Input data summary</h3></font>\n", 877 "<font><h3>Input data summary</h3></font>\n",
732 "<ul>\n", 878 "<ul>\n",
733 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n", 879 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n",
734 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n", 880 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n",
735 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n", 881 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n",
736 file = htmloutfile, append = TRUE); 882 file = htmloutfile, append = TRUE);
737 883
738 cat("<h3 id=table_of_content>Table of Contents:</h3>\n", 884 cat("<h3 id=table_of_content>Table of Contents:</h3>\n",
739 "<ul>\n", 885 "<ul>\n",
740 "<li><a href=#sample_dist>Sample distribution</a></li>\n", 886 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
741 "<li><a href=#corr_data>Correlation</a></li>\n", 887 "<li><a href=#corr_data>Correlation</a></li>\n",
742 "<li><a href=#regression_data>Regression analysis</a></li>\n", 888 "<li><a href=#regression_data>Regression analysis</a></li>\n",
743 "<li><a href=#inf_obs>Influential observations</a></li>\n", 889 "<li><a href=#inf_obs>Influential observations</a></li>\n",
791 PE_nacount = sum(is.na(PE_df)); 937 PE_nacount = sum(is.na(PE_df));
792 938
793 TE_df[is.na(TE_df)] = 0; 939 TE_df[is.na(TE_df)] = 0;
794 PE_df[is.na(PE_df)] = 0; 940 PE_df[is.na(PE_df)] = 0;
795 941
942 #===============================================================================
943 # Obtain JS/HTML lines for interactive visualization
944 #===============================================================================
945 extractWidgetCode = function(outplot){
946 lines <- readLines(gsub("\\.png", "\\.html", outplot))
947 return(list(
948 'prescripts' = c('',
949 gsub('script', 'script',
950 lines[grep('<head>',lines) + 3
951 :grep('</head>' ,lines) - 5]),
952 ''),
953 'widget_div' = paste('<!--',
954 gsub('width:100%;height:400px',
955 'width:500px;height:500px',
956 lines[grep(lines, pattern='html-widget')]),
957 '-->', sep=''),
958 'postscripts' = paste('',
959 gsub('script', 'script',
960 lines[grep(lines, pattern='<script type')]),
961 '', sep='')));
962 }
963 prescripts <- list()
964 postscripts <- list()
965
796 966
797 #=============================================================================== 967 #===============================================================================
798 # Decide based on analysis mode 968 # Decide based on analysis mode
799 #=============================================================================== 969 #===============================================================================
800 if(mode=="logfold") 970 if(mode=="logfold")
801 { 971 {
802 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', 972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
803 file = htmloutfile, append = TRUE); 973 file = htmloutfile, append = TRUE);
804 974
805 # TE Boxplot 975 # TE Boxplot
806 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); 976 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
807 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', 977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
808 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', 978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
809 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); 979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
810 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data"); 980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
811 981
812 # PE Boxplot 982 # PE Boxplot
813 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); 983 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
814 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); 984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
815 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); 985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data");
816 986
817 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', 987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
818 file = htmloutfile, append = TRUE); 988 file = htmloutfile, append = TRUE);
819 989
820 # TE PE scatter 990 # TE PE scatter
821 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); 991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
822 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); 992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
823 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td></tr>\n', file = htmloutfile, append = TRUE); 993 singlesample_scatter(PE_TE_data, outplot);
994 lines <- extractWidgetCode(outplot);
995 postscripts <- c(postscripts, lines$postscripts);
996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE);
824 PE_TE_data = data.frame(PE_df, TE_df); 997 PE_TE_data = data.frame(PE_df, TE_df);
825 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); 998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
826 singlesample_scatter(PE_TE_data, outplot);
827 999
828 # TE PE Cor 1000 # TE PE Cor
829 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); 1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE);
830 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); 1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
831 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', 1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
832 file = htmloutfile, append = TRUE); 1004 file = htmloutfile, append = TRUE);
833 cat('</td></table>', 1005 cat('</td></table>',
834 file = htmloutfile, append = TRUE); 1006 file = htmloutfile, append = TRUE);
835 1007
836 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', 1008 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
837 file = htmloutfile, append = TRUE); 1009 file = htmloutfile, append = TRUE);
838 1010
839 # TE PE Regression 1011 # TE PE Regression
840 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); 1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts));
841 1018
842 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', 1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
843 file = htmloutfile, append = TRUE); 1020 file = htmloutfile, append = TRUE);
844 1021
845 # TE PE Heatmap 1022 # TE PE Heatmap
846 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); 1023 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
1024 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
1025 postscripts <- c(postscripts, lines$postscripts)
1026 prescripts <- c(prescripts, lines$prescripts)
847 1027
848 1028
849 # TE PE Clustering (kmeans) 1029 # TE PE Clustering (kmeans)
850 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) 1030 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
1031 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts)
851 1032
852 }else{ 1033 }else{
853 if(mode=="multiple") 1034 if(mode=="multiple")
854 { 1035 {
855 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', 1036 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
856 file = htmloutfile, append = TRUE); 1037 file = htmloutfile, append = TRUE);
857 1038
858 # TE Boxplot 1039 # TE Boxplot
859 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape=""); 1040 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape="");
860 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
861 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
862 "<tr><td align=center>", '<img src="Box_TE_all_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
863 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); 1041 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
864 colnames(temp_df_te_data) = colnames(TE_df); 1042 colnames(temp_df_te_data) = colnames(TE_df);
865 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)"); 1043 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)");
1044 lines <- extractWidgetCode(outplot)
1045 prescripts <- c(prescripts, lines$prescripts)
1046 postscripts <- c(postscripts, lines$postscripts)
1047 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
1048 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
1049 "<tr><td align=center>", file = htmloutfile, append = TRUE);
1050 cat('<img src="Box_TE_all_rep.png" width=500 height=500>',
1051 lines$widget_div, '</td>', file = htmloutfile, append = TRUE);
866 1052
867 # PE Boxplot 1053 # PE Boxplot
868 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape=""); 1054 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape="");
869 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
870 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); 1055 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
871 colnames(temp_df_pe_data) = colnames(PE_df); 1056 colnames(temp_df_pe_data) = colnames(PE_df);
872 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)"); 1057 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)");
1058 lines <- extractWidgetCode(outplot)
1059 #prescripts <- c(prescripts, lines$prescripts)
1060 postscripts <- c(postscripts, lines$postscripts)
1061 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500>',
1062 lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
873 1063
874 # Calc TE PCA 1064 # Calc TE PCA
875 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape=""); 1065 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape="");
876 multisample_PCA(TE_df, sampleinfo_df, outplot); 1066 multisample_PCA(TE_df, sampleinfo_df, outplot);
877 1067 PCA_TE <- extractWidgetCode(outplot)
1068 postscripts <- c(postscripts, PCA_TE$postscripts)
1069
878 # Calc PE PCA 1070 # Calc PE PCA
879 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape=""); 1071 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape="");
880 multisample_PCA(PE_df, sampleinfo_df, outplot); 1072 multisample_PCA(PE_df, sampleinfo_df, outplot);
881 1073 PCA_PE <- extractWidgetCode(outplot)
1074 postscripts <- c(postscripts, PCA_PE$postscripts)
882 1075
883 # Replicate mode 1076 # Replicate mode
884 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method); 1077 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method);
885 TE_df = templist$TE_df_merged; 1078 TE_df = templist$TE_df_merged;
886 PE_df = templist$PE_df_merged; 1079 PE_df = templist$PE_df_merged;
887 sampleinfo_df = templist$sampleinfo_df_merged; 1080 sampleinfo_df = templist$sampleinfo_df_merged;
888 rownames(sampleinfo_df) = sampleinfo_df[,1]; 1081 rownames(sampleinfo_df) = sampleinfo_df[,1];
889 1082
890 # TE Boxplot 1083 # TE Boxplot
891 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape=""); 1084 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape="");
892 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
893 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
894 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); 1085 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
895 colnames(temp_df_te_data) = colnames(TE_df); 1086 colnames(temp_df_te_data) = colnames(TE_df);
896 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)"); 1087 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)");
897 1088 lines <- extractWidgetCode(outplot)
1089 #prescripts <- c(prescripts, lines$prescripts)
1090 postscripts <- c(postscripts, lines$postscripts)
1091 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
1092 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
1093
898 # PE Boxplot 1094 # PE Boxplot
899 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape=""); 1095 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape="");
900 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
901 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); 1096 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
902 colnames(temp_df_pe_data) = colnames(PE_df); 1097 colnames(temp_df_pe_data) = colnames(PE_df);
903 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)"); 1098 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)");
904 1099 lines <- extractWidgetCode(outplot)
1100 #prescripts <- c(prescripts, lines$prescripts)
1101 postscripts <- c(postscripts, lines$postscripts)
1102 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500>', lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
1103
905 #=============================================================================== 1104 #===============================================================================
906 # Calculating log fold change and running the "single" code part 1105 # Calculating log fold change and running the "single" code part
907 #=============================================================================== 1106 #===============================================================================
908 1107
909 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); 1108 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
910 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); 1109 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
911 1110
912 #=============================================================================== 1111 #===============================================================================
913 # Treat missing values 1112 # Treat missing values
914 #=============================================================================== 1113 #===============================================================================
915 1114
916 TE_df[is.infinite(TE_df[,2]),2] = NA; 1115 TE_df[is.infinite(TE_df[,2]),2] = NA;
917 PE_df[is.infinite(PE_df[,2]),2] = NA; 1116 PE_df[is.infinite(PE_df[,2]),2] = NA;
918 TE_df[is.na(TE_df)] = 0; 1117 TE_df[is.na(TE_df)] = 0;
919 PE_df[is.na(PE_df)] = 0; 1118 PE_df[is.na(PE_df)] = 0;
920 1119
921 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) 1120 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
922 #=============================================================================== 1121 #===============================================================================
923 # Find common samples 1122 # Find common samples
924 #=============================================================================== 1123 #===============================================================================
925 1124
926 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]); 1125 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]);
927 TE_df = select(TE_df, 1, common_samples); 1126 TE_df = select(TE_df, 1, common_samples);
928 PE_df = select(PE_df, 1, common_samples); 1127 PE_df = select(PE_df, 1, common_samples);
929 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples); 1128 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples);
930 rownames(sampleinfo_df) = sampleinfo_df[,1]; 1129 rownames(sampleinfo_df) = sampleinfo_df[,1];
931 1130
932 # TE Boxplot 1131 # TE Boxplot
933 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); 1132 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
1133 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)");
1134 lines <- extractWidgetCode(outplot)
1135 postscripts <- c(postscripts, lines$postscripts)
934 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE); 1136 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE);
935 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', 1137 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
936 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); 1138 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
937 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)");
938 1139
939 # PE Boxplot 1140 # PE Boxplot
940 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); 1141 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
941 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
942 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)"); 1142 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)");
1143 lines <- extractWidgetCode(outplot)
1144 postscripts <- c(postscripts, lines$postscripts)
1145 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,'</td></tr></table>\n', file = htmloutfile, append = TRUE);
943 1146
944 1147
945 # Log Fold Data 1148 # Log Fold Data
946 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with) 1149 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with)
947 1150 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/TE_volcano.png",sep="",collapse=""))$postscripts)
948 1151 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_volcano.png",sep="",collapse=""))$postscripts)
949 1152
950 # Print PCA 1153 # Print PCA
951 1154
952 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n', 1155 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n',
953 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500></td>\n', 1156 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500>', PCA_TE$widget_div, '</td>\n',
954 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500></td></tr></table>\n', 1157 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500>', PCA_PE$widget_div, '</td></tr></table>\n',
955 file = htmloutfile, append = TRUE); 1158 file = htmloutfile, append = TRUE);
956 1159
957 1160
958 1161
959 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', 1162 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
960 file = htmloutfile, append = TRUE); 1163 file = htmloutfile, append = TRUE);
1164
1165 PE_TE_data = data.frame(PE_df, TE_df);
1166 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
961 1167
962 # TE PE scatter 1168 # TE PE scatter
963 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); 1169 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
964 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); 1170 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
965 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td>\n', file = htmloutfile, append = TRUE);
966 PE_TE_data = data.frame(PE_df, TE_df);
967 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
968 singlesample_scatter(PE_TE_data, outplot); 1171 singlesample_scatter(PE_TE_data, outplot);
969 1172 lines <- extractWidgetCode(outplot);
1173 postscripts <- c(postscripts, lines$postscripts);
1174 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div),
1175 '</td>\n', file = htmloutfile, append = TRUE);
1176
970 # TE PE Cor 1177 # TE PE Cor
971 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE); 1178 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE);
972 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); 1179 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
973 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', 1180 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
974 file = htmloutfile, append = TRUE); 1181 file = htmloutfile, append = TRUE);
975 cat('</td></table>', 1182 cat('</td></table>',
976 file = htmloutfile, append = TRUE); 1183 file = htmloutfile, append = TRUE);
977 1184
978 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', 1185 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
979 file = htmloutfile, append = TRUE); 1186 file = htmloutfile, append = TRUE);
980 1187
981 # TE PE Regression 1188 # TE PE Regression
982 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); 1189 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
1190 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
1191 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
1192 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
1193 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
1194 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts,
1195 gsub('data-for="html', 'data-for="secondhtml"',
1196 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts)));
983 1197
984 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', 1198 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
985 file = htmloutfile, append = TRUE); 1199 file = htmloutfile, append = TRUE);
986 1200
987 #TE PE Heatmap 1201 #TE PE Heatmap
988 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); 1202 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
1203 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
1204 postscripts <- c(postscripts, lines$postscripts)
1205 prescripts <- c(prescripts, lines$prescripts)
989 1206
990 #TE PE Clustering (kmeans) 1207 #TE PE Clustering (kmeans)
991 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) 1208 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
992 1209 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts);
993 } 1210 }
994 } 1211 }
995 cat("<h3>Go To:</h3>\n", 1212 cat("<h3>Go To:</h3>\n",
996 "<ul>\n", 1213 "<ul>\n",
997 "<li><a href=#sample_dist>Sample distribution</a></li>\n", 1214 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
1000 "<li><a href=#inf_obs>Influential observations</a></li>\n", 1217 "<li><a href=#inf_obs>Influential observations</a></li>\n",
1001 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n", 1218 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n",
1002 "<br><a href=#>TOP</a>", 1219 "<br><a href=#>TOP</a>",
1003 file = htmloutfile, append = TRUE); 1220 file = htmloutfile, append = TRUE);
1004 cat("</body></html>\n", file = htmloutfile, append = TRUE); 1221 cat("</body></html>\n", file = htmloutfile, append = TRUE);
1222
1223
1224 #===============================================================================
1225 # Add masked-javascripts tags to HTML file in the head and end
1226 #===============================================================================
1227
1228 htmllines <- readLines(htmloutfile)
1229 htmllines[1] <- paste('<html>\n<head>\n', paste(prescripts, collapse='\n'), '\n</head>\n<body>')
1230 cat(paste(htmllines, collapse='\n'), file = htmloutfile)
1231 cat('\n', paste(postscripts, collapse='\n'), "\n",
1232 "</body>\n</html>\n", file = htmloutfile, append = TRUE);