changeset 3:d79ed3ec25fe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit e646be741e315df9332b5206cec1e47c11370ff1
author iuc
date Sun, 06 May 2018 13:38:41 -0400
parents a1634a9c2ee1
children 4730985c816f
files edger.R edger.xml test-data/anno.txt test-data/out_rscript.txt
diffstat 4 files changed, 101 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/edger.R	Thu Apr 19 17:26:38 2018 -0400
+++ b/edger.R	Sun May 06 13:38:41 2018 -0400
@@ -306,9 +306,13 @@
 bcvOutPng <- makeOut("bcvplot.png")
 qlOutPdf <- makeOut("qlplot.pdf")
 qlOutPng <- makeOut("qlplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-mdOutPdf <- character()   # Initialise character vector
+mdsOutPdf <- character()   # Initialise character vector
+mdsOutPng <- character()
+for (i in 1:ncol(factors)) {
+    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+}
+mdOutPdf <- character()
 mdOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
@@ -338,7 +342,9 @@
 data <- list()
 data$counts <- counts
 if (haveAnno) {
-    data$genes <- geneanno
+  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+  annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+  data$genes <- annoord
 } else {
     data$genes <- data.frame(GeneID=row.names(counts))
 }
@@ -410,17 +416,41 @@
 
 # Plot MDS
 labels <- names(counts)
+
+# MDS plot
 png(mdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
-imageData[1, ] <- c("MDS Plot", "mdsplot.png")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+imgName <- paste0("MDS Plot_", names(factors)[1], ".png")
+imgAddr <- paste0("mdsplot_", names(factors)[1], ".png")
+imageData[1, ] <- c(imgName, imgAddr)
 invisible(dev.off())
 
 pdf(mdsOutPdf)
-plotMDS(data, labels=labels, cex=0.5)
-linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+linkData[1, ] <- c(linkName, linkAddr)
 invisible(dev.off())
 
+# If additional factors create additional MDS plots coloured by factor
+if (ncol(factors) > 1) {
+    for (i in 2:ncol(factors)) {
+        png(mdsOutPng[i], width=600, height=600)
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        imgName <- paste0("MDS Plot_", names(factors)[i], ".png")
+        imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(mdsOutPdf[i])
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+        linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+}
+
 # BCV Plot
 png(bcvOutPng, width=600, height=600)
 plotBCV(data, main="BCV Plot")
@@ -469,7 +499,7 @@
 if (wantNorm) { 
         normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 
         normalisedCounts <- data.frame(data$genes, normalisedCounts)
-        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
+        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 
@@ -492,7 +522,7 @@
                                              
     # Write top expressions table
     top <- topTags(res, n=Inf, sort.by="PValue")
-    write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
     
     linkName <- paste0("edgeR_", contrastData[i], ".tsv")
     linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
@@ -502,7 +532,7 @@
     pdf(mdOutPdf[i])
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -515,7 +545,7 @@
     png(mdOutPng[i], height=600, width=600)
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -620,7 +650,7 @@
 
 if (filtCPM || filtSmpCount || filtTotCount) {
     if (filtCPM) {
-    tempStr <- paste("Genes without more than", opt$cmpReq,
+    tempStr <- paste("Genes without more than", opt$cpmReq,
                                      "CPM in at least", opt$sampleReq, "samples are insignificant",
                                      "and filtered out.")
     } else if (filtSmpCount) {
--- a/edger.xml	Thu Apr 19 17:26:38 2018 -0400
+++ b/edger.xml	Sun May 06 13:38:41 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="edger" name="edgeR" version="3.20.7.1">
+<tool id="edger" name="edgeR" version="3.20.7.2">
     <description>
         Perform differential expression of count data
     </description>
@@ -120,12 +120,12 @@
                     </param>
                     <repeat name="rep_group" title="Group" min="2" default="2">
                         <param name="groupName" type="text" label="Name"
-                        help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
+                        help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive).">
                         <sanitizer>
                             <valid initial="string.letters,string.digits"><add value="_" /></valid>
                         </sanitizer>
                         </param>
-                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
+                        <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts files"/>
                     </repeat>
                 </repeat>
             </when>
@@ -683,19 +683,19 @@
 **Gene Annotations:**
 Optional input for gene annotations, this can contain more
 information about the genes than just an ID number. The annotations will
-be available in the differential expression results table and the optional normalised counts table.
+be available in the differential expression results table and the optional normalised counts table. The file must contain a header row and have the gene IDs in the first column. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish.
 
 Example:
 
     ==========  ==========  ===================================================
     **GeneID**  **Symbol**  **GeneName**
     ----------  ----------  ---------------------------------------------------
-    1287        Pzp         pregnancy zone protein
-    1298        Aanat       arylalkylamine N-acetyltransferase
-    1302        Aatk        apoptosis-associated tyrosine kinase
-    1303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
-    1304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
-    1305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
+    11287        Pzp         pregnancy zone protein
+    11298        Aanat       arylalkylamine N-acetyltransferase
+    11302        Aatk        apoptosis-associated tyrosine kinase
+    11303        Abca1       ATP-binding cassette, sub-family A (ABC1), member 1
+    11304        Abca4       ATP-binding cassette, sub-family A (ABC1), member 4
+    11305        Abca2       ATP-binding cassette, sub-family A (ABC1), member 2
     ==========  ==========  ===================================================
 
 **Contrasts of Interest:**
--- a/test-data/anno.txt	Thu Apr 19 17:26:38 2018 -0400
+++ b/test-data/anno.txt	Sun May 06 13:38:41 2018 -0400
@@ -1,7 +1,7 @@
 EntrezID	Symbol	GeneName	Chr	Length
+11302	Aatk	apoptosis-associated tyrosine kinase	11	5743
 11287	Pzp	pregnancy zone protein	6	4681
-11298	Aanat	arylalkylamine N-acetyltransferase	11	1455
-11302	Aatk	apoptosis-associated tyrosine kinase	11	5743
 11303	Abca1	ATP-binding cassette, sub-family A (ABC1), member 1	4	10260
 11304	Abca4	ATP-binding cassette, sub-family A (ABC1), member 4	3	7248
-11305	Abca2	ATP-binding cassette, sub-family A (ABC1), member 2	2	8061
\ No newline at end of file
+11305	Abca2	ATP-binding cassette, sub-family A (ABC1), member 2	2	8061
+11298	Aanat	arylalkylamine N-acetyltransferase	11	1455
--- a/test-data/out_rscript.txt	Thu Apr 19 17:26:38 2018 -0400
+++ b/test-data/out_rscript.txt	Sun May 06 13:38:41 2018 -0400
@@ -306,9 +306,13 @@
 bcvOutPng <- makeOut("bcvplot.png")
 qlOutPdf <- makeOut("qlplot.pdf")
 qlOutPng <- makeOut("qlplot.png")
-mdsOutPdf <- makeOut("mdsplot.pdf")
-mdsOutPng <- makeOut("mdsplot.png")
-mdOutPdf <- character()   # Initialise character vector
+mdsOutPdf <- character()   # Initialise character vector
+mdsOutPng <- character()
+for (i in 1:ncol(factors)) {
+    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
+    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+}
+mdOutPdf <- character()
 mdOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
@@ -338,7 +342,9 @@
 data <- list()
 data$counts <- counts
 if (haveAnno) {
-    data$genes <- geneanno
+  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+  annoord <- geneanno[match(row.names(counts), geneanno[,1]), ]
+  data$genes <- annoord
 } else {
     data$genes <- data.frame(GeneID=row.names(counts))
 }
@@ -410,17 +416,41 @@
 
 # Plot MDS
 labels <- names(counts)
+
+# MDS plot
 png(mdsOutPng, width=600, height=600)
-# Currently only using a single factor
-plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot")
-imageData[1, ] <- c("MDS Plot", "mdsplot.png")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+imgName <- paste0("MDS Plot_", names(factors)[1], ".png")
+imgAddr <- paste0("mdsplot_", names(factors)[1], ".png")
+imageData[1, ] <- c(imgName, imgAddr)
 invisible(dev.off())
 
 pdf(mdsOutPdf)
-plotMDS(data, labels=labels, cex=0.5)
-linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf")
+plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1]))
+linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf")
+linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf")
+linkData[1, ] <- c(linkName, linkAddr)
 invisible(dev.off())
 
+# If additional factors create additional MDS plots coloured by factor
+if (ncol(factors) > 1) {
+    for (i in 2:ncol(factors)) {
+        png(mdsOutPng[i], width=600, height=600)
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        imgName <- paste0("MDS Plot_", names(factors)[i], ".png")
+        imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+        imageData <- rbind(imageData, c(imgName, imgAddr))
+        invisible(dev.off())
+
+        pdf(mdsOutPdf[i])
+        plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
+        linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf")
+        linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+}
+
 # BCV Plot
 png(bcvOutPng, width=600, height=600)
 plotBCV(data, main="BCV Plot")
@@ -469,7 +499,7 @@
 if (wantNorm) { 
         normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) 
         normalisedCounts <- data.frame(data$genes, normalisedCounts)
-        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t")
+        write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE)
         linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 
@@ -492,7 +522,7 @@
                                              
     # Write top expressions table
     top <- topTags(res, n=Inf, sort.by="PValue")
-    write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
+    write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
     
     linkName <- paste0("edgeR_", contrastData[i], ".tsv")
     linkAddr <- paste0("edgeR_", contrastData[i], ".tsv")
@@ -502,7 +532,7 @@
     pdf(mdOutPdf[i])
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -515,7 +545,7 @@
     png(mdOutPng[i], height=600, width=600)
     limma::plotMD(res, status=status,
                                 main=paste("MD Plot:", unmake.names(contrastData[i])), 
-                                col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"),
+                                hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
                                 xlab="Average Expression", ylab="logFC")
     
     abline(h=0, col="grey", lty=2)
@@ -620,7 +650,7 @@
 
 if (filtCPM || filtSmpCount || filtTotCount) {
     if (filtCPM) {
-    tempStr <- paste("Genes without more than", opt$cmpReq,
+    tempStr <- paste("Genes without more than", opt$cpmReq,
                                      "CPM in at least", opt$sampleReq, "samples are insignificant",
                                      "and filtered out.")
     } else if (filtSmpCount) {