Mercurial > repos > iuc > limma_voom
comparison limma_voom.R @ 14:3133e833b3ce draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit c915bd7e68cb3a2944397aaf184c2b1db97cb3a5
author | iuc |
---|---|
date | Fri, 04 Jan 2019 04:11:56 -0500 |
parents | d5a940112511 |
children | 5d903d528193 |
comparison
equal
deleted
inserted
replaced
13:d5a940112511 | 14:3133e833b3ce |
---|---|
303 countsRows <- nrow(counts) | 303 countsRows <- nrow(counts) |
304 | 304 |
305 # Process factors | 305 # Process factors |
306 if (is.null(opt$factInput)) { | 306 if (is.null(opt$factInput)) { |
307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) | 307 factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) |
308 # order samples as in counts matrix | |
309 factorData <- factorData[match(colnames(counts), factorData[, 1]), ] | |
308 factors <- factorData[, -1, drop=FALSE] | 310 factors <- factorData[, -1, drop=FALSE] |
309 } else { | 311 } else { |
310 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) | 312 factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) |
311 factorData <- list() | 313 factorData <- list() |
312 for (fact in factors) { | 314 for (fact in factors) { |
423 keep <- rowSums(data$counts) >= opt$cntReq | 425 keep <- rowSums(data$counts) >= opt$cntReq |
424 } else if (filtSmpCount) { | 426 } else if (filtSmpCount) { |
425 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq | 427 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq |
426 } else if (filtCPM) { | 428 } else if (filtCPM) { |
427 myCPM <- cpm(data$counts) | 429 myCPM <- cpm(data$counts) |
428 thresh <- myCPM >= opt$cpmReq | 430 thresh <- myCPM >= opt$cpmReq |
429 keep <- rowSums(thresh) >= opt$sampleReq | 431 keep <- rowSums(thresh) >= opt$sampleReq |
430 | 432 |
431 if ("c" %in% plots) { | 433 if ("c" %in% plots) { |
432 # Plot CPM vs raw counts (to check threshold) | 434 # Plot CPM vs raw counts (to check threshold) |
433 pdf(cpmOutPdf, width=6.5, height=10) | 435 pdf(cpmOutPdf, width=6.5, height=10) |
641 linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf") | 643 linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf") |
642 linkAddr <- "mdsscree.pdf" | 644 linkAddr <- "mdsscree.pdf" |
643 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) | 645 linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) |
644 invisible(dev.off()) | 646 invisible(dev.off()) |
645 | 647 |
648 # generate Glimma interactive MDS Plot | |
649 if ("i" %in% plots) { | |
650 Glimma::glMDSPlot(y, labels=samplenames, groups=factors[, 1], | |
651 folder="glimma_MDS", launch=FALSE) | |
652 linkName <- "Glimma_MDSPlot.html" | |
653 linkAddr <- "glimma_MDS/MDS-Plot.html" | |
654 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
655 } | |
656 | |
646 if ("x" %in% plots) { | 657 if ("x" %in% plots) { |
647 png(mdsxOutPng, width=1000, height=500) | 658 png(mdsxOutPng, width=1000, height=500) |
648 par(mfrow=c(1, 2)) | 659 par(mfrow=c(1, 2)) |
649 for (i in 2:3) { | 660 for (i in 2:3) { |
650 dim1 <- i | 661 dim1 <- i |
698 if (wantRobust) { | 709 if (wantRobust) { |
699 fit <- eBayes(fit, trend=TRUE, robust=TRUE) | 710 fit <- eBayes(fit, trend=TRUE, robust=TRUE) |
700 } else { | 711 } else { |
701 fit <- eBayes(fit, trend=TRUE, robust=FALSE) | 712 fit <- eBayes(fit, trend=TRUE, robust=FALSE) |
702 } | 713 } |
703 # plot fit with plotSA | |
704 saOutPng <- makeOut("saplot.png") | |
705 saOutPdf <- makeOut("saplot.pdf") | |
706 | |
707 png(saOutPng, width=500, height=500) | |
708 plotSA(fit, main="SA Plot") | |
709 imgName <- "SAPlot.png" | |
710 imgAddr <- "saplot.png" | |
711 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
712 invisible(dev.off()) | |
713 | |
714 pdf(saOutPdf, width=14) | |
715 plotSA(fit, main="SA Plot") | |
716 linkName <- "SAPlot.pdf" | |
717 linkAddr <- "saplot.pdf" | |
718 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
719 invisible(dev.off()) | |
720 | 714 |
721 plotData <- logCPM | 715 plotData <- logCPM |
722 | 716 |
723 # Save normalised counts (log2cpm) | 717 # Save normalised counts (log2cpm) |
724 if (wantNorm) { | 718 if (wantNorm) { |
725 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE) | 719 write.table(logCPM, file=normOut, row.names=TRUE, sep="\t", quote=FALSE) |
726 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) | 720 linkData <- rbind(linkData, c((paste0(deMethod, "_", "normcounts.tsv")), (paste0(deMethod, "_", "normcounts")))) |
727 } | 721 } |
728 } else { | 722 } else { |
729 # limma-voom approach | 723 # limma-voom approach |
730 voomOutPdf <- makeOut("voomplot.pdf") | |
731 voomOutPng <- makeOut("voomplot.png") | |
732 | 724 |
733 if (wantWeight) { | 725 if (wantWeight) { |
726 voomWtsOutPdf <- makeOut("voomwtsplot.pdf") | |
727 voomWtsOutPng <- makeOut("voomwtsplot.png") | |
734 # Creating voom data object and plot | 728 # Creating voom data object and plot |
735 png(voomOutPng, width=1000, height=500) | 729 png(voomWtsOutPng, width=1000, height=500) |
736 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) | 730 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) |
737 imgName <- "VoomPlot.png" | 731 imgName <- "VoomWithQualityWeightsPlot.png" |
738 imgAddr <- "voomplot.png" | 732 imgAddr <- "voomwtsplot.png" |
739 imageData <- rbind(imageData, c(imgName, imgAddr)) | 733 imageData <- rbind(imageData, c(imgName, imgAddr)) |
740 invisible(dev.off()) | 734 invisible(dev.off()) |
741 | 735 |
742 pdf(voomOutPdf, width=14) | 736 pdf(voomWtsOutPdf, width=14) |
743 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) | 737 vData <- voomWithQualityWeights(y, design=design, plot=TRUE) |
744 linkName <- "VoomPlot.pdf" | 738 linkName <- "VoomWithQualityWeightsPlot.pdf" |
745 linkAddr <- "voomplot.pdf" | 739 linkAddr <- "voomwtsplot.pdf" |
746 linkData <- rbind(linkData, c(linkName, linkAddr)) | 740 linkData <- rbind(linkData, c(linkName, linkAddr)) |
747 invisible(dev.off()) | 741 invisible(dev.off()) |
748 | 742 |
749 # Generating fit data and top table with weights | 743 # Generating fit data and top table with weights |
750 wts <- vData$weights | 744 wts <- vData$weights |
751 voomFit <- lmFit(vData, design, weights=wts) | 745 voomFit <- lmFit(vData, design, weights=wts) |
752 | 746 |
753 } else { | 747 } else { |
748 voomOutPdf <- makeOut("voomplot.pdf") | |
749 voomOutPng <- makeOut("voomplot.png") | |
754 # Creating voom data object and plot | 750 # Creating voom data object and plot |
755 png(voomOutPng, width=500, height=500) | 751 png(voomOutPng, width=500, height=500) |
756 vData <- voom(y, design=design, plot=TRUE) | 752 vData <- voom(y, design=design, plot=TRUE) |
757 imgName <- "VoomPlot" | 753 imgName <- "VoomPlot" |
758 imgAddr <- "voomplot.png" | 754 imgAddr <- "voomplot.png" |
784 } else { | 780 } else { |
785 fit <- eBayes(voomFit, robust=FALSE) | 781 fit <- eBayes(voomFit, robust=FALSE) |
786 } | 782 } |
787 plotData <- vData | 783 plotData <- vData |
788 } | 784 } |
785 | |
786 # plot final model mean-variance trend with plotSA | |
787 saOutPng <- makeOut("saplot.png") | |
788 saOutPdf <- makeOut("saplot.pdf") | |
789 | |
790 png(saOutPng, width=500, height=500) | |
791 plotSA(fit, main="Final model: Mean-variance trend (SA Plot)") | |
792 imgName <- "SAPlot.png" | |
793 imgAddr <- "saplot.png" | |
794 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
795 invisible(dev.off()) | |
796 | |
797 pdf(saOutPdf) | |
798 plotSA(fit, main="Final model: Mean-variance trend (SA Plot)") | |
799 linkName <- "SAPlot.pdf" | |
800 linkAddr <- "saplot.pdf" | |
801 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
802 invisible(dev.off()) | |
789 | 803 |
790 # Save library size info | 804 # Save library size info |
791 if (wantLibinfo) { | 805 if (wantLibinfo) { |
792 efflibsize <- round(y$samples$lib.size * y$samples$norm.factors) | 806 efflibsize <- round(y$samples$lib.size * y$samples$norm.factors) |
793 libsizeinfo <- cbind(y$samples, EffectiveLibrarySize=efflibsize) | 807 libsizeinfo <- cbind(y$samples, EffectiveLibrarySize=efflibsize) |
842 linkName <- paste0("MDPlot_", con, ".pdf") | 856 linkName <- paste0("MDPlot_", con, ".pdf") |
843 linkAddr <- paste0("mdplot_", con, ".pdf") | 857 linkAddr <- paste0("mdplot_", con, ".pdf") |
844 linkData <- rbind(linkData, c(linkName, linkAddr)) | 858 linkData <- rbind(linkData, c(linkName, linkAddr)) |
845 invisible(dev.off()) | 859 invisible(dev.off()) |
846 | 860 |
847 # Generate Glimma interactive MD plot and table, requires annotation file (assumes gene labels/symbols in 2nd column) | 861 # Generate Glimma interactive Volcano, MD plot and tables, requires annotation file (assumes gene labels/symbols in 2nd column) |
848 if (haveAnno) { | 862 if ("i" %in% plots & haveAnno) { |
849 # make gene labels unique to handle NAs | 863 # make gene labels unique to handle NAs |
850 geneanno <- y$genes | 864 geneanno <- y$genes |
851 geneanno[, 2] <- make.unique(geneanno[, 2]) | 865 geneanno[, 2] <- make.unique(geneanno[, 2]) |
866 | |
867 # MD plot | |
852 Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1], | 868 Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1], |
853 status=status[, i], sample.cols=col.group, | 869 status=status[, i], sample.cols=col.group, |
854 main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], | 870 main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], |
855 folder=paste0("glimma_", unmake.names(con)), launch=FALSE) | 871 folder=paste0("glimma_", unmake.names(con)), launch=FALSE) |
856 linkName <- paste0("Glimma_MDPlot_", con, ".html") | 872 linkName <- paste0("Glimma_MDPlot_", con, ".html") |
857 linkAddr <- paste0("glimma_", con, "/MD-Plot.html") | 873 linkAddr <- paste0("glimma_", con, "/MD-Plot.html") |
874 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
875 | |
876 # Volcano plot | |
877 Glimma::glXYPlot(x=fit$coefficients[, i], y=fit$lods[, i], counts=y$counts, anno=geneanno, groups=factors[, 1], | |
878 status=status[, i], sample.cols=col.group, | |
879 main=paste("Volcano Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], | |
880 xlab="logFC", ylab="logodds", | |
881 folder=paste0("glimma_volcano_", unmake.names(con)), launch=FALSE) | |
882 linkName <- paste0("Glimma_VolcanoPlot_", con, ".html") | |
883 linkAddr <- paste0("glimma_volcano_", con, "/XY-Plot.html") | |
858 linkData <- rbind(linkData, c(linkName, linkAddr)) | 884 linkData <- rbind(linkData, c(linkName, linkAddr)) |
859 } | 885 } |
860 | 886 |
861 # Plot Volcano | 887 # Plot Volcano |
862 pdf(volOutPdf[i]) | 888 pdf(volOutPdf[i]) |
940 pval <- round(top[topgenes[j], "adj.P.Val"], 5) | 966 pval <- round(top[topgenes[j], "adj.P.Val"], 5) |
941 if (wantTrend) { | 967 if (wantTrend) { |
942 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, | 968 stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, |
943 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) | 969 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) |
944 } else { | 970 } else { |
945 stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, | 971 stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, |
946 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) | 972 method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) |
947 } | 973 } |
948 } | 974 } |
949 linkName <- paste0("Stripcharts_", con, ".pdf") | 975 linkName <- paste0("Stripcharts_", con, ".pdf") |
950 linkAddr <- paste0("stripcharts_", con, ".pdf") | 976 linkAddr <- paste0("stripcharts_", con, ".pdf") |
988 cata("<body>\n") | 1014 cata("<body>\n") |
989 cata("<h3>Limma Analysis Output:</h3>\n") | 1015 cata("<h3>Limma Analysis Output:</h3>\n") |
990 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n") | 1016 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n") |
991 | 1017 |
992 for (i in 1:nrow(imageData)) { | 1018 for (i in 1:nrow(imageData)) { |
993 if (grepl("density|box|mds|mdvol", imageData$Link[i])) { | 1019 if (grepl("density|box|mds|mdvol|wts", imageData$Link[i])) { |
994 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) | |
995 } else if (wantWeight) { | |
996 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) | 1020 HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) |
997 } else { | 1021 } else { |
998 HtmlImage(imageData$Link[i], imageData$Label[i]) | 1022 HtmlImage(imageData$Link[i], imageData$Label[i]) |
999 } | 1023 } |
1000 } | 1024 } |
1019 cata("</table>") | 1043 cata("</table>") |
1020 | 1044 |
1021 cata("<h4>Plots:</h4>\n") | 1045 cata("<h4>Plots:</h4>\n") |
1022 #PDFs | 1046 #PDFs |
1023 for (i in 1:nrow(linkData)) { | 1047 for (i in 1:nrow(linkData)) { |
1024 if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) { | 1048 if (grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", linkData$Link[i])) { |
1025 HtmlLink(linkData$Link[i], linkData$Label[i]) | 1049 HtmlLink(linkData$Link[i], linkData$Label[i]) |
1026 } | 1050 } |
1027 } | 1051 } |
1028 | 1052 |
1029 for (i in 1:nrow(linkData)) { | 1053 for (i in 1:nrow(linkData)) { |
1066 HtmlLink(linkData$Link[i], linkData$Label[i]) | 1090 HtmlLink(linkData$Link[i], linkData$Label[i]) |
1067 } | 1091 } |
1068 } | 1092 } |
1069 } | 1093 } |
1070 | 1094 |
1071 cata("<h4>Glimma Interactive Results:</h4>\n") | 1095 if ("i" %in% plots) { |
1072 for (i in 1:nrow(linkData)) { | 1096 cata("<h4>Glimma Interactive Results:</h4>\n") |
1073 if (grepl("glimma", linkData$Link[i])) { | 1097 for (i in 1:nrow(linkData)) { |
1074 HtmlLink(linkData$Link[i], linkData$Label[i]) | 1098 if (grepl("glimma", linkData$Link[i])) { |
1075 } | 1099 HtmlLink(linkData$Link[i], linkData$Label[i]) |
1076 } | 1100 } |
1101 } | |
1102 } | |
1077 | 1103 |
1078 cata("<p>Alt-click links to download file.</p>\n") | 1104 cata("<p>Alt-click links to download file.</p>\n") |
1079 cata("<p>Click floppy disc icon associated history item to download ") | 1105 cata("<p>Click floppy disc icon associated history item to download ") |
1080 cata("all files.</p>\n") | 1106 cata("all files.</p>\n") |
1081 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") | 1107 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") |