comparison limma_voom.R @ 14:3133e833b3ce draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit c915bd7e68cb3a2944397aaf184c2b1db97cb3a5
author iuc
date Fri, 04 Jan 2019 04:11:56 -0500
parents d5a940112511
children 5d903d528193
comparison
equal deleted inserted replaced
13:d5a940112511 14:3133e833b3ce
303 countsRows <- nrow(counts) 303 countsRows <- nrow(counts)
304 304
305 # Process factors 305 # Process factors
306 if (is.null(opt$factInput)) { 306 if (is.null(opt$factInput)) {
307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) 307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE)
308 # order samples as in counts matrix
309 factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
308 factors <- factorData[, -1, drop=FALSE] 310 factors <- factorData[, -1, drop=FALSE]
309 } else { 311 } else {
310 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) 312 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE))
311 factorData <- list() 313 factorData <- list()
312 for (fact in factors) { 314 for (fact in factors) {
423 keep <- rowSums(data$counts) >= opt$cntReq 425 keep <- rowSums(data$counts) >= opt$cntReq
424 } else if (filtSmpCount) { 426 } else if (filtSmpCount) {
425 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq 427 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
426 } else if (filtCPM) { 428 } else if (filtCPM) {
427 myCPM <- cpm(data$counts) 429 myCPM <- cpm(data$counts)
428 thresh <- myCPM >= opt$cpmReq 430 thresh <- myCPM >= opt$cpmReq
429 keep <- rowSums(thresh) >= opt$sampleReq 431 keep <- rowSums(thresh) >= opt$sampleReq
430 432
431 if ("c" %in% plots) { 433 if ("c" %in% plots) {
432 # Plot CPM vs raw counts (to check threshold) 434 # Plot CPM vs raw counts (to check threshold)
433 pdf(cpmOutPdf, width=6.5, height=10) 435 pdf(cpmOutPdf, width=6.5, height=10)
641 linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf") 643 linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf")
642 linkAddr <- "mdsscree.pdf" 644 linkAddr <- "mdsscree.pdf"
643 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) 645 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
644 invisible(dev.off()) 646 invisible(dev.off())
645 647
648 # generate Glimma interactive MDS Plot
649 if ("i" %in% plots) {
650 Glimma::glMDSPlot(y, labels=samplenames, groups=factors[, 1],
651 folder="glimma_MDS", launch=FALSE)
652 linkName <- "Glimma_MDSPlot.html"
653 linkAddr <- "glimma_MDS/MDS-Plot.html"
654 linkData <- rbind(linkData, c(linkName, linkAddr))
655 }
656
646 if ("x" %in% plots) { 657 if ("x" %in% plots) {
647 png(mdsxOutPng, width=1000, height=500) 658 png(mdsxOutPng, width=1000, height=500)
648 par(mfrow=c(1, 2)) 659 par(mfrow=c(1, 2))
649 for (i in 2:3) { 660 for (i in 2:3) {
650 dim1 <- i 661 dim1 <- i
698 if (wantRobust) { 709 if (wantRobust) {
699 fit <- eBayes(fit, trend=TRUE, robust=TRUE) 710 fit <- eBayes(fit, trend=TRUE, robust=TRUE)
700 } else { 711 } else {
701 fit <- eBayes(fit, trend=TRUE, robust=FALSE) 712 fit <- eBayes(fit, trend=TRUE, robust=FALSE)
702 } 713 }
703 # plot fit with plotSA
704 saOutPng <- makeOut("saplot.png")
705 saOutPdf <- makeOut("saplot.pdf")
706
707 png(saOutPng, width=500, height=500)
708 plotSA(fit, main="SA Plot")
709 imgName <- "SAPlot.png"
710 imgAddr <- "saplot.png"
711 imageData <- rbind(imageData, c(imgName, imgAddr))
712 invisible(dev.off())
713
714 pdf(saOutPdf, width=14)
715 plotSA(fit, main="SA Plot")
716 linkName <- "SAPlot.pdf"
717 linkAddr <- "saplot.pdf"
718 linkData <- rbind(linkData, c(linkName, linkAddr))
719 invisible(dev.off())
720 714
721 plotData <- logCPM 715 plotData <- logCPM
722 716
723 # Save normalised counts (log2cpm) 717 # Save normalised counts (log2cpm)
724 if (wantNorm) { 718 if (wantNorm) {
725 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE) 719 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE)
726 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) 720 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts"))))
727 } 721 }
728 } else { 722 } else {
729 # limma-voom approach 723 # limma-voom approach
730 voomOutPdf <- makeOut("voomplot.pdf")
731 voomOutPng <- makeOut("voomplot.png")
732 724
733 if (wantWeight) { 725 if (wantWeight) {
726 voomWtsOutPdf <- makeOut("voomwtsplot.pdf")
727 voomWtsOutPng <- makeOut("voomwtsplot.png")
734 # Creating voom data object and plot 728 # Creating voom data object and plot
735 png(voomOutPng, width=1000, height=500) 729 png(voomWtsOutPng, width=1000, height=500)
736 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) 730 vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
737 imgName <- "VoomPlot.png" 731 imgName <- "VoomWithQualityWeightsPlot.png"
738 imgAddr <- "voomplot.png" 732 imgAddr <- "voomwtsplot.png"
739 imageData <- rbind(imageData, c(imgName, imgAddr)) 733 imageData <- rbind(imageData, c(imgName, imgAddr))
740 invisible(dev.off()) 734 invisible(dev.off())
741 735
742 pdf(voomOutPdf, width=14) 736 pdf(voomWtsOutPdf, width=14)
743 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) 737 vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
744 linkName <- "VoomPlot.pdf" 738 linkName <- "VoomWithQualityWeightsPlot.pdf"
745 linkAddr <- "voomplot.pdf" 739 linkAddr <- "voomwtsplot.pdf"
746 linkData <- rbind(linkData, c(linkName, linkAddr)) 740 linkData <- rbind(linkData, c(linkName, linkAddr))
747 invisible(dev.off()) 741 invisible(dev.off())
748 742
749 # Generating fit data and top table with weights 743 # Generating fit data and top table with weights
750 wts <- vData$weights 744 wts <- vData$weights
751 voomFit <- lmFit(vData, design, weights=wts) 745 voomFit <- lmFit(vData, design, weights=wts)
752 746
753 } else { 747 } else {
748 voomOutPdf <- makeOut("voomplot.pdf")
749 voomOutPng <- makeOut("voomplot.png")
754 # Creating voom data object and plot 750 # Creating voom data object and plot
755 png(voomOutPng, width=500, height=500) 751 png(voomOutPng, width=500, height=500)
756 vData <- voom(y, design=design, plot=TRUE) 752 vData <- voom(y, design=design, plot=TRUE)
757 imgName <- "VoomPlot" 753 imgName <- "VoomPlot"
758 imgAddr <- "voomplot.png" 754 imgAddr <- "voomplot.png"
784 } else { 780 } else {
785 fit <- eBayes(voomFit, robust=FALSE) 781 fit <- eBayes(voomFit, robust=FALSE)
786 } 782 }
787 plotData <- vData 783 plotData <- vData
788 } 784 }
785
786 # plot final model mean-variance trend with plotSA
787 saOutPng <- makeOut("saplot.png")
788 saOutPdf <- makeOut("saplot.pdf")
789
790 png(saOutPng, width=500, height=500)
791 plotSA(fit, main="Final model: Mean-variance trend (SA Plot)")
792 imgName <- "SAPlot.png"
793 imgAddr <- "saplot.png"
794 imageData <- rbind(imageData, c(imgName, imgAddr))
795 invisible(dev.off())
796
797 pdf(saOutPdf)
798 plotSA(fit, main="Final model: Mean-variance trend (SA Plot)")
799 linkName <- "SAPlot.pdf"
800 linkAddr <- "saplot.pdf"
801 linkData <- rbind(linkData, c(linkName, linkAddr))
802 invisible(dev.off())
789 803
790 # Save library size info 804 # Save library size info
791 if (wantLibinfo) { 805 if (wantLibinfo) {
792 efflibsize <- round(y$samples$lib.size * y$samples$norm.factors) 806 efflibsize <- round(y$samples$lib.size * y$samples$norm.factors)
793 libsizeinfo <- cbind(y$samples, EffectiveLibrarySize=efflibsize) 807 libsizeinfo <- cbind(y$samples, EffectiveLibrarySize=efflibsize)
842 linkName <- paste0("MDPlot_", con, ".pdf") 856 linkName <- paste0("MDPlot_", con, ".pdf")
843 linkAddr <- paste0("mdplot_", con, ".pdf") 857 linkAddr <- paste0("mdplot_", con, ".pdf")
844 linkData <- rbind(linkData, c(linkName, linkAddr)) 858 linkData <- rbind(linkData, c(linkName, linkAddr))
845 invisible(dev.off()) 859 invisible(dev.off())
846 860
847 # Generate Glimma interactive MD plot and table, requires annotation file (assumes gene labels/symbols in 2nd column) 861 # Generate Glimma interactive Volcano, MD plot and tables, requires annotation file (assumes gene labels/symbols in 2nd column)
848 if (haveAnno) { 862 if ("i" %in% plots & haveAnno) {
849 # make gene labels unique to handle NAs 863 # make gene labels unique to handle NAs
850 geneanno <- y$genes 864 geneanno <- y$genes
851 geneanno[, 2] <- make.unique(geneanno[, 2]) 865 geneanno[, 2] <- make.unique(geneanno[, 2])
866
867 # MD plot
852 Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1], 868 Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1],
853 status=status[, i], sample.cols=col.group, 869 status=status[, i], sample.cols=col.group,
854 main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], 870 main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2],
855 folder=paste0("glimma_", unmake.names(con)), launch=FALSE) 871 folder=paste0("glimma_", unmake.names(con)), launch=FALSE)
856 linkName <- paste0("Glimma_MDPlot_", con, ".html") 872 linkName <- paste0("Glimma_MDPlot_", con, ".html")
857 linkAddr <- paste0("glimma_", con, "/MD-Plot.html") 873 linkAddr <- paste0("glimma_", con, "/MD-Plot.html")
874 linkData <- rbind(linkData, c(linkName, linkAddr))
875
876 # Volcano plot
877 Glimma::glXYPlot(x=fit$coefficients[, i], y=fit$lods[, i], counts=y$counts, anno=geneanno, groups=factors[, 1],
878 status=status[, i], sample.cols=col.group,
879 main=paste("Volcano Plot:", unmake.names(con)), side.main=colnames(y$genes)[2],
880 xlab="logFC", ylab="logodds",
881 folder=paste0("glimma_volcano_", unmake.names(con)), launch=FALSE)
882 linkName <- paste0("Glimma_VolcanoPlot_", con, ".html")
883 linkAddr <- paste0("glimma_volcano_", con, "/XY-Plot.html")
858 linkData <- rbind(linkData, c(linkName, linkAddr)) 884 linkData <- rbind(linkData, c(linkName, linkAddr))
859 } 885 }
860 886
861 # Plot Volcano 887 # Plot Volcano
862 pdf(volOutPdf[i]) 888 pdf(volOutPdf[i])
940 pval <- round(top[topgenes[j], "adj.P.Val"], 5) 966 pval <- round(top[topgenes[j], "adj.P.Val"], 5)
941 if (wantTrend) { 967 if (wantTrend) {
942 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, 968 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
943 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) 969 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
944 } else { 970 } else {
945 stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, 971 stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
946 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) 972 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
947 } 973 }
948 } 974 }
949 linkName <- paste0("Stripcharts_", con, ".pdf") 975 linkName <- paste0("Stripcharts_", con, ".pdf")
950 linkAddr <- paste0("stripcharts_", con, ".pdf") 976 linkAddr <- paste0("stripcharts_", con, ".pdf")
988 cata("<body>\n") 1014 cata("<body>\n")
989 cata("<h3>Limma Analysis Output:</h3>\n") 1015 cata("<h3>Limma Analysis Output:</h3>\n")
990 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n") 1016 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")
991 1017
992 for (i in 1:nrow(imageData)) { 1018 for (i in 1:nrow(imageData)) {
993 if (grepl("density|box|mds|mdvol", imageData$Link[i])) { 1019 if (grepl("density|box|mds|mdvol|wts", imageData$Link[i])) {
994 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
995 } else if (wantWeight) {
996 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) 1020 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
997 } else { 1021 } else {
998 HtmlImage(imageData$Link[i], imageData$Label[i]) 1022 HtmlImage(imageData$Link[i], imageData$Label[i])
999 } 1023 }
1000 } 1024 }
1019 cata("</table>") 1043 cata("</table>")
1020 1044
1021 cata("<h4>Plots:</h4>\n") 1045 cata("<h4>Plots:</h4>\n")
1022 #PDFs 1046 #PDFs
1023 for (i in 1:nrow(linkData)) { 1047 for (i in 1:nrow(linkData)) {
1024 if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) { 1048 if (grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", linkData$Link[i])) {
1025 HtmlLink(linkData$Link[i], linkData$Label[i]) 1049 HtmlLink(linkData$Link[i], linkData$Label[i])
1026 } 1050 }
1027 } 1051 }
1028 1052
1029 for (i in 1:nrow(linkData)) { 1053 for (i in 1:nrow(linkData)) {
1066 HtmlLink(linkData$Link[i], linkData$Label[i]) 1090 HtmlLink(linkData$Link[i], linkData$Label[i])
1067 } 1091 }
1068 } 1092 }
1069 } 1093 }
1070 1094
1071 cata("<h4>Glimma Interactive Results:</h4>\n") 1095 if ("i" %in% plots) {
1072 for (i in 1:nrow(linkData)) { 1096 cata("<h4>Glimma Interactive Results:</h4>\n")
1073 if (grepl("glimma", linkData$Link[i])) { 1097 for (i in 1:nrow(linkData)) {
1074 HtmlLink(linkData$Link[i], linkData$Label[i]) 1098 if (grepl("glimma", linkData$Link[i])) {
1075 } 1099 HtmlLink(linkData$Link[i], linkData$Label[i])
1076 } 1100 }
1101 }
1102 }
1077 1103
1078 cata("<p>Alt-click links to download file.</p>\n") 1104 cata("<p>Alt-click links to download file.</p>\n")
1079 cata("<p>Click floppy disc icon associated history item to download ") 1105 cata("<p>Click floppy disc icon associated history item to download ")
1080 cata("all files.</p>\n") 1106 cata("all files.</p>\n")
1081 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") 1107 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")