Mercurial > repos > pravs > protein_rna_correlation
diff protein_rna_correlation.r @ 5:6bf0203ee17e draft
planemo upload
author | pravs |
---|---|
date | Wed, 20 Jun 2018 20:43:56 -0400 |
parents | 412403eec79c |
children | 8e9428eca82c |
line wrap: on
line diff
--- a/protein_rna_correlation.r Sun Jun 17 05:06:18 2018 -0400 +++ b/protein_rna_correlation.r Wed Jun 20 20:43:56 2018 -0400 @@ -80,8 +80,8 @@ # ............................SCRIPT STARTS FROM HERE ............................. #================================================================================== # Warning Off -oldw <- getOption("warn") -options(warn = -1) +# oldw <- getOption("warn") +# options(warn = -1) #============================================================================================================= # Functions #============================================================================================================= @@ -411,10 +411,10 @@ cat( "<font color='blue'><h3>Download mapped unmapped data</h3></font>", - "<ul><li>Protein mapped data: ", '<a href="',paste(outdir,"/",PE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", - "<li>Protein unmapped data: ", '<a href="',paste(outdir,"/",PE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", - "<li>Transcript mapped data: ", '<a href="',paste(outdir,"/",GE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", - "<li>Transcript unmapped data: ", '<a href="',paste(outdir,"/",GE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<ul><li>Protein mapped data: ", '<a href="',paste(PE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<li>Protein unmapped data: ", '<a href="',paste(PE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<li>Transcript mapped data: ", '<a href="',paste(GE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<li>Transcript unmapped data: ", '<a href="',paste(GE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", file = htmloutfile, append = TRUE); } @@ -422,8 +422,8 @@ write.table(GE_df[rowpair[,2], c(GE_idcolno,GE_expcolno)], file=paste(outdir,"/",GE_expfile,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n") cat( - "<li>Protein abundance data: ", '<a href="',paste(outdir,"/",PE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", - "<li>Transcript abundance data: ", '<a href="',paste(outdir,"/",GE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>", + "<li>Protein abundance data: ", '<a href="',paste(PE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<li>Transcript abundance data: ", '<a href="',paste(GE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>", file = htmloutfile, append = TRUE); #========================================================================================== @@ -470,8 +470,8 @@ # Write excluded transcriptomics and proteomics data link to html file cat( - "<ul><li>Protein excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",PE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", - "<li>Transcript excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",GE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>", + "<ul><li>Protein excluded data with NA or Inf or -Inf: ", '<a href="',paste(PE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>", + "<li>Transcript excluded data with NA or Inf or -Inf: ", '<a href="',paste(GE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>", file = htmloutfile, append = TRUE); # Keep the unexcluded entries only @@ -535,7 +535,7 @@ # Distribution of proteome and transcriptome abundance (Box and Density plot) #============================================================================================================= cat("Generating Box and Density plot\n",file=logfile, append=T); - outplot = paste("./",outdir,"/AbundancePlot.png",sep="",collapse=""); + outplot = paste(outdir,"/AbundancePlot.png",sep="",collapse=""); png(outplot); par(mfrow=c(2,2)); boxplot(proteome_df[,2], ylab="Abundance", main="Proteome abundance", cex.lab=1.5); @@ -553,7 +553,7 @@ # Scatter plot #============================================================================================================= cat("Generating scatter plot\n",file=logfile, append=T); - outplot = paste("./",outdir,"/AbundancePlot_scatter.png",sep="",collapse=""); + outplot = paste(outdir,"/AbundancePlot_scatter.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); scatter.smooth(transcriptome_df[,2], proteome_df[,2], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); @@ -620,7 +620,7 @@ #============================================================================================================= # Plotting various regression diagnostics plots #============================================================================================================= - outplot1 = paste("./",outdir,"/PE_GE_modelfit.pdf",sep="",collapse=""); + outplot1 = paste(outdir,"/PE_GE_modelfit.pdf",sep="",collapse=""); pdf(outplot1); devnum = which(unlist(sapply(2:length(.Devices), function(x){attributes(.Devices[[x]])$filepath==outplot1})))+1 print(.Devices) @@ -635,31 +635,31 @@ "<font color='blue'><h3>Plotting various regression diagnostics plots</h3></font>\n", file = htmloutfile, append = TRUE); - outplot = paste("./",outdir,"/PE_GE_lm_1.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_1.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); plot(regmodel, 1, cex.lab=1.5); dev.off(); - outplot = paste("./",outdir,"/PE_GE_lm_2.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_2.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); plot(regmodel, 2, cex.lab=1.5); dev.off(); - outplot = paste("./",outdir,"/PE_GE_lm_3.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_3.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); plot(regmodel, 3, cex.lab=1.5); dev.off(); - outplot = paste("./",outdir,"/PE_GE_lm_5.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_5.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); plot(regmodel, 5, cex.lab=1.5); dev.off(); - outplot = paste("./",outdir,"/PE_GE_lm.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm.pdf",sep="",collapse=""); pdf(outplot); plot(regmodel); dev.off(); @@ -704,7 +704,7 @@ cooksd <- cooks.distance(regmodel); cat("Generating cooksd plot\n",file=logfile, append=T); - outplot = paste("./",outdir,"/PE_GE_lm_cooksd.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_cooksd.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); plot(cooksd, pch="*", cex=2, cex.lab=1.5,main="Influential Obs. by Cooks distance", ylab="Cook\'s distance", xlab="Observations") # plot cooks distance @@ -737,7 +737,7 @@ cat("Writing influential observations\n",file=logfile, append=T); - outdatafile = paste("./",outdir,"/PE_GE_influential_observation.tsv", sep="", collapse=""); + outdatafile = paste(outdir,"/PE_GE_influential_observation.tsv", sep="", collapse=""); cat('<a href="',outdatafile, '" target="_blank">Download entire list</a>',file = htmloutfile, append = TRUE); write.table(PE_GE_data_outlier, file=outdatafile, row.names=F, sep="\t", quote=F); @@ -764,7 +764,7 @@ #============================================================================================================= # Scatter plot #============================================================================================================= - outplot = paste("./",outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); + outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); scatter.smooth(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); @@ -806,10 +806,12 @@ file = htmloutfile, append = TRUE); cat("Generating heatmap plot\n",file=logfile, append=T); - outplot = paste("./",outdir,"/PE_GE_heatmap.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_heatmap.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); - heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col"); + #heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col"); + my_palette <- colorRampPalette(c("green", "white", "red"))(299); + heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=my_palette ,Colv=F, labCol=c("PE","GE"), scale="col", dendrogram = "row"); dev.off(); cat( @@ -825,7 +827,7 @@ k1 = kmeans(PE_GE_data_kdata[,c("PE_abundance","GE_abundance")], 5); cat("Generating kmeans plot\n",file=logfile, append=T); - outplot = paste("./",outdir,"/PE_GE_kmeans.png",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_kmeans.png",sep="",collapse=""); png(outplot); par(mfrow=c(1,1)); scatter.smooth(PE_GE_data_kdata[,"GE_abundance"], PE_GE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); @@ -864,7 +866,7 @@ # Linear regression with removal of outliers regmodel_no_outlier = lm(PE_abundance~GE_abundance, data=PE_GE_data_no_outlier); regmodel_no_outlier_predictedy = predict(regmodel_no_outlier, PE_GE_data_no_outlier); - outplot = paste("./",outdir,"/PE_GE_lm_without_outliers.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lm_without_outliers.pdf",sep="",collapse=""); plot(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Linear regression with removal of outliers"); points(PE_GE_data_no_outlier[,"GE_abundance"], regmodel_no_outlier_predictedy, col="red"); @@ -876,7 +878,7 @@ # Resistant regression (lqs / least trimmed squares method) regmodel_lqs = lqs(PE_abundance~GE_abundance, data=PE_GE_data); regmodel_lqs_predictedy = predict(regmodel_lqs, PE_GE_data); - outplot = paste("./",outdir,"/PE_GE_lqs.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_lqs.pdf",sep="",collapse=""); pdf(outplot); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Resistant regression (lqs / least trimmed squares method)"); points(PE_GE_data[,"GE_abundance"], regmodel_lqs_predictedy, col="red"); @@ -890,7 +892,7 @@ # Robust regression (rlm / Huber M-estimator method) regmodel_rlm = rlm(PE_abundance~GE_abundance, data=PE_GE_data); regmodel_rlm_predictedy = predict(regmodel_rlm, PE_GE_data); - outplot = paste("./",outdir,"/PE_GE_rlm.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_rlm.pdf",sep="",collapse=""); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Robust regression (rlm / Huber M-estimator method)"); points(PE_GE_data[,"GE_abundance"], regmodel_rlm_predictedy, col="red"); pdf(outplot); @@ -915,7 +917,7 @@ regmodel_poly5_predictedy = predict(regmodel_poly5, PE_GE_data); regmodel_poly6_predictedy = predict(regmodel_poly6, PE_GE_data); - outplot = paste("./",outdir,"/PE_GE_poly2.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_poly2.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 2"); points(PE_GE_data[,"GE_abundance"], regmodel_poly2_predictedy, col="red"); @@ -923,7 +925,7 @@ plot(regmodel_poly2); dev.off(); - outplot = paste("./",outdir,"/PE_GE_poly3.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_poly3.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 3"); points(PE_GE_data[,"GE_abundance"], regmodel_poly3_predictedy, col="red"); @@ -931,7 +933,7 @@ plot(regmodel_poly3); dev.off(); - outplot = paste("./",outdir,"/PE_GE_poly4.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_poly4.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 4"); points(PE_GE_data[,"GE_abundance"], regmodel_poly4_predictedy, col="red"); @@ -939,7 +941,7 @@ plot(regmodel_poly4); dev.off(); - outplot = paste("./",outdir,"/PE_GE_poly5.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_poly5.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 5"); points(PE_GE_data[,"GE_abundance"], regmodel_poly5_predictedy, col="red"); @@ -947,7 +949,7 @@ plot(regmodel_poly5); dev.off(); - outplot = paste("./",outdir,"/PE_GE_poly6.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_poly6.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 6"); points(PE_GE_data[,"GE_abundance"], regmodel_poly6_predictedy, col="red"); @@ -960,7 +962,7 @@ regmodel_gam_predictedy = predict(regmodel_gam, PE_GE_data); regmodel_gam_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_gam$fitted.values) - outplot = paste("./",outdir,"/PE_GE_gam.pdf",sep="",collapse=""); + outplot = paste(outdir,"/PE_GE_gam.pdf",sep="",collapse=""); dev.set(devnum); plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Generalized additive models"); points(PE_GE_data[,"GE_abundance"], regmodel_gam_predictedy, col="red"); @@ -1019,7 +1021,7 @@ # Warning On - options(warn = oldw) + # options(warn = oldw) \ No newline at end of file