Mercurial > repos > iuc > edger
changeset 3:d79ed3ec25fe draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit e646be741e315df9332b5206cec1e47c11370ff1
author | iuc |
---|---|
date | Sun, 06 May 2018 13:38:41 -0400 |
parents | a1634a9c2ee1 |
children | 4730985c816f |
files | edger.R edger.xml test-data/anno.txt test-data/out_rscript.txt |
diffstat | 4 files changed, 101 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/edger.R Thu Apr 19 17:26:38 2018 -0400 +++ b/edger.R Sun May 06 13:38:41 2018 -0400 @@ -306,9 +306,13 @@ bcvOutPng <- makeOut("bcvplot.png") qlOutPdf <- makeOut("qlplot.pdf") qlOutPng <- makeOut("qlplot.png") -mdsOutPdf <- makeOut("mdsplot.pdf") -mdsOutPng <- makeOut("mdsplot.png") -mdOutPdf <- character() # Initialise character vector +mdsOutPdf <- character() # Initialise character vector +mdsOutPng <- character() +for (i in 1:ncol(factors)) { + mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf")) + mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png")) +} +mdOutPdf <- character() mdOutPng <- character() topOut <- character() for (i in 1:length(contrastData)) { @@ -338,7 +342,9 @@ data <- list() data$counts <- counts if (haveAnno) { - data$genes <- geneanno + # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) + annoord <- geneanno[match(row.names(counts), geneanno[,1]), ] + data$genes <- annoord } else { data$genes <- data.frame(GeneID=row.names(counts)) } @@ -410,17 +416,41 @@ # Plot MDS labels <- names(counts) + +# MDS plot png(mdsOutPng, width=600, height=600) -# Currently only using a single factor -plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot") -imageData[1, ] <- c("MDS Plot", "mdsplot.png") +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) +imgName <- paste0("MDS Plot_", names(factors)[1], ".png") +imgAddr <- paste0("mdsplot_", names(factors)[1], ".png") +imageData[1, ] <- c(imgName, imgAddr) invisible(dev.off()) pdf(mdsOutPdf) -plotMDS(data, labels=labels, cex=0.5) -linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf") +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) +linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf") +linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf") +linkData[1, ] <- c(linkName, linkAddr) invisible(dev.off()) +# If additional factors create additional MDS plots coloured by factor +if (ncol(factors) > 1) { + for (i in 2:ncol(factors)) { + png(mdsOutPng[i], width=600, height=600) + plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) + imgName <- paste0("MDS Plot_", names(factors)[i], ".png") + imgAddr <- paste0("mdsplot_", names(factors)[i], ".png") + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(mdsOutPdf[i]) + plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) + linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf") + linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + } +} + # BCV Plot png(bcvOutPng, width=600, height=600) plotBCV(data, main="BCV Plot") @@ -469,7 +499,7 @@ if (wantNorm) { normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) normalisedCounts <- data.frame(data$genes, normalisedCounts) - write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t") + write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) } @@ -492,7 +522,7 @@ # Write top expressions table top <- topTags(res, n=Inf, sort.by="PValue") - write.table(top, file=topOut[i], row.names=FALSE, sep="\t") + write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) linkName <- paste0("edgeR_", contrastData[i], ".tsv") linkAddr <- paste0("edgeR_", contrastData[i], ".tsv") @@ -502,7 +532,7 @@ pdf(mdOutPdf[i]) limma::plotMD(res, status=status, main=paste("MD Plot:", unmake.names(contrastData[i])), - col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), xlab="Average Expression", ylab="logFC") abline(h=0, col="grey", lty=2) @@ -515,7 +545,7 @@ png(mdOutPng[i], height=600, width=600) limma::plotMD(res, status=status, main=paste("MD Plot:", unmake.names(contrastData[i])), - col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), xlab="Average Expression", ylab="logFC") abline(h=0, col="grey", lty=2) @@ -620,7 +650,7 @@ if (filtCPM || filtSmpCount || filtTotCount) { if (filtCPM) { - tempStr <- paste("Genes without more than", opt$cmpReq, + tempStr <- paste("Genes without more than", opt$cpmReq, "CPM in at least", opt$sampleReq, "samples are insignificant", "and filtered out.") } else if (filtSmpCount) {
--- a/edger.xml Thu Apr 19 17:26:38 2018 -0400 +++ b/edger.xml Sun May 06 13:38:41 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="edger" name="edgeR" version="3.20.7.1"> +<tool id="edger" name="edgeR" version="3.20.7.2"> <description> Perform differential expression of count data </description> @@ -120,12 +120,12 @@ </param> <repeat name="rep_group" title="Group" min="2" default="2"> <param name="groupName" type="text" label="Name" - help="Name of group that the counts files(s) belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive)."> + help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive)."> <sanitizer> <valid initial="string.letters,string.digits"><add value="_" /></valid> </sanitizer> </param> - <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/> + <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts files"/> </repeat> </repeat> </when> @@ -683,19 +683,19 @@ **Gene Annotations:** Optional input for gene annotations, this can contain more information about the genes than just an ID number. The annotations will -be available in the differential expression results table and the optional normalised counts table. +be available in the differential expression results table and the optional normalised counts table. The file must contain a header row and have the gene IDs in the first column. The number of rows should match that of the counts files, add NA for any gene IDs with no annotation. The Galaxy tool **annotateMyIDs** can be used to obtain annotations for human, mouse, fly and zebrafish. Example: ========== ========== =================================================== **GeneID** **Symbol** **GeneName** ---------- ---------- --------------------------------------------------- - 1287 Pzp pregnancy zone protein - 1298 Aanat arylalkylamine N-acetyltransferase - 1302 Aatk apoptosis-associated tyrosine kinase - 1303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 - 1304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 - 1305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 + 11287 Pzp pregnancy zone protein + 11298 Aanat arylalkylamine N-acetyltransferase + 11302 Aatk apoptosis-associated tyrosine kinase + 11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 + 11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 + 11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 ========== ========== =================================================== **Contrasts of Interest:**
--- a/test-data/anno.txt Thu Apr 19 17:26:38 2018 -0400 +++ b/test-data/anno.txt Sun May 06 13:38:41 2018 -0400 @@ -1,7 +1,7 @@ EntrezID Symbol GeneName Chr Length +11302 Aatk apoptosis-associated tyrosine kinase 11 5743 11287 Pzp pregnancy zone protein 6 4681 -11298 Aanat arylalkylamine N-acetyltransferase 11 1455 -11302 Aatk apoptosis-associated tyrosine kinase 11 5743 11303 Abca1 ATP-binding cassette, sub-family A (ABC1), member 1 4 10260 11304 Abca4 ATP-binding cassette, sub-family A (ABC1), member 4 3 7248 -11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061 \ No newline at end of file +11305 Abca2 ATP-binding cassette, sub-family A (ABC1), member 2 2 8061 +11298 Aanat arylalkylamine N-acetyltransferase 11 1455
--- a/test-data/out_rscript.txt Thu Apr 19 17:26:38 2018 -0400 +++ b/test-data/out_rscript.txt Sun May 06 13:38:41 2018 -0400 @@ -306,9 +306,13 @@ bcvOutPng <- makeOut("bcvplot.png") qlOutPdf <- makeOut("qlplot.pdf") qlOutPng <- makeOut("qlplot.png") -mdsOutPdf <- makeOut("mdsplot.pdf") -mdsOutPng <- makeOut("mdsplot.png") -mdOutPdf <- character() # Initialise character vector +mdsOutPdf <- character() # Initialise character vector +mdsOutPng <- character() +for (i in 1:ncol(factors)) { + mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf")) + mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png")) +} +mdOutPdf <- character() mdOutPng <- character() topOut <- character() for (i in 1:length(contrastData)) { @@ -338,7 +342,9 @@ data <- list() data$counts <- counts if (haveAnno) { - data$genes <- geneanno + # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) + annoord <- geneanno[match(row.names(counts), geneanno[,1]), ] + data$genes <- annoord } else { data$genes <- data.frame(GeneID=row.names(counts)) } @@ -410,17 +416,41 @@ # Plot MDS labels <- names(counts) + +# MDS plot png(mdsOutPng, width=600, height=600) -# Currently only using a single factor -plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main="MDS Plot") -imageData[1, ] <- c("MDS Plot", "mdsplot.png") +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) +imgName <- paste0("MDS Plot_", names(factors)[1], ".png") +imgAddr <- paste0("mdsplot_", names(factors)[1], ".png") +imageData[1, ] <- c(imgName, imgAddr) invisible(dev.off()) pdf(mdsOutPdf) -plotMDS(data, labels=labels, cex=0.5) -linkData[1, ] <- c("MDS Plot.pdf", "mdsplot.pdf") +plotMDS(data, labels=labels, col=as.numeric(factors[, 1]), cex=0.8, main=paste("MDS Plot:", names(factors)[1])) +linkName <- paste0("MDS Plot_", names(factors)[1], ".pdf") +linkAddr <- paste0("mdsplot_", names(factors)[1], ".pdf") +linkData[1, ] <- c(linkName, linkAddr) invisible(dev.off()) +# If additional factors create additional MDS plots coloured by factor +if (ncol(factors) > 1) { + for (i in 2:ncol(factors)) { + png(mdsOutPng[i], width=600, height=600) + plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) + imgName <- paste0("MDS Plot_", names(factors)[i], ".png") + imgAddr <- paste0("mdsplot_", names(factors)[i], ".png") + imageData <- rbind(imageData, c(imgName, imgAddr)) + invisible(dev.off()) + + pdf(mdsOutPdf[i]) + plotMDS(data, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i])) + linkName <- paste0("MDS Plot_", names(factors)[i], ".pdf") + linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf") + linkData <- rbind(linkData, c(linkName, linkAddr)) + invisible(dev.off()) + } +} + # BCV Plot png(bcvOutPng, width=600, height=600) plotBCV(data, main="BCV Plot") @@ -469,7 +499,7 @@ if (wantNorm) { normalisedCounts <- cpm(data, normalized.lib.sizes=TRUE, log=TRUE) normalisedCounts <- data.frame(data$genes, normalisedCounts) - write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t") + write.table (normalisedCounts, file=normOut, row.names=FALSE, sep="\t", quote=FALSE) linkData <- rbind(linkData, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) } @@ -492,7 +522,7 @@ # Write top expressions table top <- topTags(res, n=Inf, sort.by="PValue") - write.table(top, file=topOut[i], row.names=FALSE, sep="\t") + write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE) linkName <- paste0("edgeR_", contrastData[i], ".tsv") linkAddr <- paste0("edgeR_", contrastData[i], ".tsv") @@ -502,7 +532,7 @@ pdf(mdOutPdf[i]) limma::plotMD(res, status=status, main=paste("MD Plot:", unmake.names(contrastData[i])), - col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), xlab="Average Expression", ylab="logFC") abline(h=0, col="grey", lty=2) @@ -515,7 +545,7 @@ png(mdOutPng[i], height=600, width=600) limma::plotMD(res, status=status, main=paste("MD Plot:", unmake.names(contrastData[i])), - col=alpha(c("firebrick", "blue"), 0.4), values=c("1", "-1"), + hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1), xlab="Average Expression", ylab="logFC") abline(h=0, col="grey", lty=2) @@ -620,7 +650,7 @@ if (filtCPM || filtSmpCount || filtTotCount) { if (filtCPM) { - tempStr <- paste("Genes without more than", opt$cmpReq, + tempStr <- paste("Genes without more than", opt$cpmReq, "CPM in at least", opt$sampleReq, "samples are insignificant", "and filtered out.") } else if (filtSmpCount) {