# HG changeset patch # User iuc # Date 1546593116 18000 # Node ID 3133e833b3cebe6a8b1a1aabd38de73d941130d5 # Parent d5a9401125111f9ea036273276b5641d65d17e96 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/limma_voom commit c915bd7e68cb3a2944397aaf184c2b1db97cb3a5 diff -r d5a940112511 -r 3133e833b3ce limma_voom.R --- a/limma_voom.R Sun Sep 30 10:51:29 2018 -0400 +++ b/limma_voom.R Fri Jan 04 04:11:56 2019 -0500 @@ -305,6 +305,8 @@ # Process factors if (is.null(opt$factInput)) { factorData <- read.table(opt$factFile, header=TRUE, sep="\t", strip.white=TRUE) + # order samples as in counts matrix + factorData <- factorData[match(colnames(counts), factorData[, 1]), ] factors <- factorData[, -1, drop=FALSE] } else { factors <- unlist(strsplit(opt$factInput, "|", fixed=TRUE)) @@ -425,7 +427,7 @@ keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq } else if (filtCPM) { myCPM <- cpm(data$counts) - thresh <- myCPM >= opt$cpmReq + thresh <- myCPM >= opt$cpmReq keep <- rowSums(thresh) >= opt$sampleReq if ("c" %in% plots) { @@ -643,6 +645,15 @@ linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE)) invisible(dev.off()) +# generate Glimma interactive MDS Plot +if ("i" %in% plots) { + Glimma::glMDSPlot(y, labels=samplenames, groups=factors[, 1], + folder="glimma_MDS", launch=FALSE) + linkName <- "Glimma_MDSPlot.html" + linkAddr <- "glimma_MDS/MDS-Plot.html" + linkData <- rbind(linkData, c(linkName, linkAddr)) +} + if ("x" %in% plots) { png(mdsxOutPng, width=1000, height=500) par(mfrow=c(1, 2)) @@ -700,23 +711,6 @@ } else { fit <- eBayes(fit, trend=TRUE, robust=FALSE) } - # plot fit with plotSA - saOutPng <- makeOut("saplot.png") - saOutPdf <- makeOut("saplot.pdf") - - png(saOutPng, width=500, height=500) - plotSA(fit, main="SA Plot") - imgName <- "SAPlot.png" - imgAddr <- "saplot.png" - imageData <- rbind(imageData, c(imgName, imgAddr)) - invisible(dev.off()) - - pdf(saOutPdf, width=14) - plotSA(fit, main="SA Plot") - linkName <- "SAPlot.pdf" - linkAddr <- "saplot.pdf" - linkData <- rbind(linkData, c(linkName, linkAddr)) - invisible(dev.off()) plotData <- logCPM @@ -727,22 +721,22 @@ } } else { # limma-voom approach - voomOutPdf <- makeOut("voomplot.pdf") - voomOutPng <- makeOut("voomplot.png") if (wantWeight) { + voomWtsOutPdf <- makeOut("voomwtsplot.pdf") + voomWtsOutPng <- makeOut("voomwtsplot.png") # Creating voom data object and plot - png(voomOutPng, width=1000, height=500) + png(voomWtsOutPng, width=1000, height=500) vData <- voomWithQualityWeights(y, design=design, plot=TRUE) - imgName <- "VoomPlot.png" - imgAddr <- "voomplot.png" + imgName <- "VoomWithQualityWeightsPlot.png" + imgAddr <- "voomwtsplot.png" imageData <- rbind(imageData, c(imgName, imgAddr)) invisible(dev.off()) - pdf(voomOutPdf, width=14) + pdf(voomWtsOutPdf, width=14) vData <- voomWithQualityWeights(y, design=design, plot=TRUE) - linkName <- "VoomPlot.pdf" - linkAddr <- "voomplot.pdf" + linkName <- "VoomWithQualityWeightsPlot.pdf" + linkAddr <- "voomwtsplot.pdf" linkData <- rbind(linkData, c(linkName, linkAddr)) invisible(dev.off()) @@ -751,6 +745,8 @@ voomFit <- lmFit(vData, design, weights=wts) } else { + voomOutPdf <- makeOut("voomplot.pdf") + voomOutPng <- makeOut("voomplot.png") # Creating voom data object and plot png(voomOutPng, width=500, height=500) vData <- voom(y, design=design, plot=TRUE) @@ -787,6 +783,24 @@ plotData <- vData } +# plot final model mean-variance trend with plotSA +saOutPng <- makeOut("saplot.png") +saOutPdf <- makeOut("saplot.pdf") + +png(saOutPng, width=500, height=500) +plotSA(fit, main="Final model: Mean-variance trend (SA Plot)") +imgName <- "SAPlot.png" +imgAddr <- "saplot.png" +imageData <- rbind(imageData, c(imgName, imgAddr)) +invisible(dev.off()) + +pdf(saOutPdf) +plotSA(fit, main="Final model: Mean-variance trend (SA Plot)") +linkName <- "SAPlot.pdf" +linkAddr <- "saplot.pdf" +linkData <- rbind(linkData, c(linkName, linkAddr)) +invisible(dev.off()) + # Save library size info if (wantLibinfo) { efflibsize <- round(y$samples$lib.size * y$samples$norm.factors) @@ -844,11 +858,13 @@ linkData <- rbind(linkData, c(linkName, linkAddr)) invisible(dev.off()) - # Generate Glimma interactive MD plot and table, requires annotation file (assumes gene labels/symbols in 2nd column) - if (haveAnno) { + # Generate Glimma interactive Volcano, MD plot and tables, requires annotation file (assumes gene labels/symbols in 2nd column) + if ("i" %in% plots & haveAnno) { # make gene labels unique to handle NAs geneanno <- y$genes geneanno[, 2] <- make.unique(geneanno[, 2]) + + # MD plot Glimma::glMDPlot(fit, coef=i, counts=y$counts, anno=geneanno, groups=factors[, 1], status=status[, i], sample.cols=col.group, main=paste("MD Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], @@ -856,6 +872,16 @@ linkName <- paste0("Glimma_MDPlot_", con, ".html") linkAddr <- paste0("glimma_", con, "/MD-Plot.html") linkData <- rbind(linkData, c(linkName, linkAddr)) + + # Volcano plot + Glimma::glXYPlot(x=fit$coefficients[, i], y=fit$lods[, i], counts=y$counts, anno=geneanno, groups=factors[, 1], + status=status[, i], sample.cols=col.group, + main=paste("Volcano Plot:", unmake.names(con)), side.main=colnames(y$genes)[2], + xlab="logFC", ylab="logodds", + folder=paste0("glimma_volcano_", unmake.names(con)), launch=FALSE) + linkName <- paste0("Glimma_VolcanoPlot_", con, ".html") + linkAddr <- paste0("glimma_volcano_", con, "/XY-Plot.html") + linkData <- rbind(linkData, c(linkName, linkAddr)) } # Plot Volcano @@ -942,7 +968,7 @@ stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) } else { - stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, + stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols, method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)) } } @@ -990,9 +1016,7 @@ cata("Links to PDF copies of plots are in 'Plots' section below
\n") for (i in 1:nrow(imageData)) { - if (grepl("density|box|mds|mdvol", imageData$Link[i])) { - HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) - } else if (wantWeight) { + if (grepl("density|box|mds|mdvol|wts", imageData$Link[i])) { HtmlImage(imageData$Link[i], imageData$Label[i], width=1000) } else { HtmlImage(imageData$Link[i], imageData$Label[i]) @@ -1021,7 +1045,7 @@ cata("

Plots:

\n") #PDFs for (i in 1:nrow(linkData)) { - if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) { + if (grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", linkData$Link[i])) { HtmlLink(linkData$Link[i], linkData$Label[i]) } } @@ -1068,12 +1092,14 @@ } } -cata("

Glimma Interactive Results:

\n") - for (i in 1:nrow(linkData)) { - if (grepl("glimma", linkData$Link[i])) { - HtmlLink(linkData$Link[i], linkData$Label[i]) +if ("i" %in% plots) { + cata("

Glimma Interactive Results:

\n") + for (i in 1:nrow(linkData)) { + if (grepl("glimma", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } } - } +} cata("

Alt-click links to download file.

\n") cata("

Click floppy disc icon associated history item to download ") diff -r d5a940112511 -r 3133e833b3ce limma_voom.xml --- a/limma_voom.xml Sun Sep 30 10:51:29 2018 -0400 +++ b/limma_voom.xml Fri Jan 04 04:11:56 2019 -0500 @@ -1,4 +1,4 @@ - + Perform differential expression with limma-voom or limma-trend @@ -120,12 +120,12 @@ mkdir ./output_dir && -#if $anno.annoOpt=='yes': - cp -r ./glimma* '$outReport.files_path' && +cp '$outReport.files_path'/*tsv output_dir/ + +#if 'i' in str($out.plots).split( "," ): + && cp -r ./glimma* '$outReport.files_path' #end if -cp '$outReport.files_path'/*tsv output_dir/ - #if $out.filtCounts or $out.normCounts: && cp '$outReport.files_path'/*counts output_dir/ #end if @@ -212,7 +212,7 @@ + help="If you provide an annotation file, annotations will be added to the table(s) of differential expression results to provide descriptions for each gene, and used to label the top genes in the Volcano plot. Interactive Glimma Volcano and MD plots will also be generated. See Help section below."> @@ -272,7 +272,8 @@

- + + @@ -384,6 +385,7 @@ + @@ -464,7 +466,7 @@ - + @@ -653,7 +655,7 @@ **What it does** -Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, performs differential expression (DE) using the limma_ Bioconductor package and produces plots and tables useful in DE analysis. If an annotation file is provided, interactive Glimma_ plots and a table of differentially expressed genes will also be generated. See an example workflow here_. +Given a matrix of counts (e.g. from featureCounts) and optional information about the genes, this tool performs differential expression (DE) using the limma_ Bioconductor package and produces plots and tables useful in DE analysis. Interactive Glimma_ plots and tables can also be generated and links to the Glimma plots will be provided in the report. See an example workflow here_. In the `limma approach`_ to RNA-seq, read counts are converted to log2-counts-per-million (logCPM) and the mean-variance relationship is modelled either with precision weights or with an empirical Bayes prior trend. The precision weights approach is called “voom” and the prior trend approach is called “limma-trend”. For more information, see the Help section below. @@ -699,7 +701,7 @@ **Gene Annotations:** Optional input for gene annotations, this can contain more information about the genes than just an ID number. The annotations will -be available in the differential expression results table and the optional normalised counts table. They will also be used to generate interactive Glimma_ MD plots and table of differential expression, a link to the Glimma plots will be provided in the report. The input annotation file must contain a header row and have the gene IDs in the first column. The second column will be used to label the genes in the Volcano plot and interactive Glimma plots, additional columns will be available in the Glimma interactive table. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish. +be available in the differential expression results table and the optional normalised counts table. They will also be used to generate interactive Glimma_ Volcano, MD plots and tables of differential expression. The input annotation file must contain a header row and have the gene IDs in the first column. The second column will be used to label the genes in the Volcano plot and interactive Glimma plots, additional columns will be available in the Glimma interactive table. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish. Example: @@ -715,7 +717,7 @@ ========== ========== =================================================== **Factor Information:** -Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors. +Enter factor names and groups in the tool form, or provide a tab-separated file that has the names of the samples in the first column and one header row. The sample names must be the same as the names in the columns of the count matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors. Example: @@ -730,7 +732,7 @@ Mut3 Mut b3 ========== ============ ========= -*Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model. +*Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, an additive linear model will be used. *Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas. @@ -835,10 +837,11 @@ * a table of differentially expressed genes for each contrast of interest * a HTML report with plots and additional information - * an interactive Glimma MD plot and table (if annotation file provided) Optionally, under **Output Options** you can choose to output + * interactive Glimma plots and tables: MDS plot, and (if annotation file is input) Volcano plot and MD plot (default: Yes) + * additional plots in the report and as PDFs * a normalised counts table * a library size information file * the R script used by this tool @@ -899,7 +902,7 @@ .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html .. _Glimma: https://bioconductor.org/packages/release/bioc/html/Glimma.html -.. _here: https://f1000research.com/articles/5-1408/v2 +.. _here: https://f1000research.com/articles/5-1408/v3 .. _limma approach: https://www.ncbi.nlm.nih.gov/pubmed/25605792 .. _limma User's Guide: http://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html diff -r d5a940112511 -r 3133e833b3ce test-data/factorinfo.txt --- a/test-data/factorinfo.txt Sun Sep 30 10:51:29 2018 -0400 +++ b/test-data/factorinfo.txt Fri Jan 04 04:11:56 2019 -0500 @@ -1,7 +1,7 @@ Sample Genotype Batch -Mut1 Mut b1 +WT3 WT b3 Mut2 Mut b2 Mut3 Mut b3 WT1 WT b1 WT2 WT b2 -WT3 WT b3 +Mut1 Mut b1