# HG changeset patch
# User iuc
# Date 1510060694 18000
# Node ID 9bdff28ae1b1c8e2fb07aab7163f0c7762ff24c3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit eac022c9c6e51e661c1513306b9fefdad673487d
diff -r 000000000000 -r 9bdff28ae1b1 edger.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/edger.R Tue Nov 07 08:18:14 2017 -0500
@@ -0,0 +1,718 @@
+# This tool takes in a matrix of feature counts as well as gene annotations and
+# outputs a table of top expressions as well as various plots for differential
+# expression analysis
+#
+# ARGS: htmlPath", "R", 1, "character" -Path to html file linking to other outputs
+# outPath", "o", 1, "character" -Path to folder to write all output to
+# filesPath", "j", 2, "character" -JSON list object if multiple files input
+# matrixPath", "m", 2, "character" -Path to count matrix
+# factFile", "f", 2, "character" -Path to factor information file
+# factInput", "i", 2, "character" -String containing factors if manually input
+# annoPath", "a", 2, "character" -Path to input containing gene annotations
+# contrastData", "C", 1, "character" -String containing contrasts of interest
+# cpmReq", "c", 2, "double" -Float specifying cpm requirement
+# cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement
+# sampleReq", "s", 2, "integer" -Integer specifying cpm requirement
+# normCounts", "x", 0, "logical" -String specifying if normalised counts should be output
+# rdaOpt", "r", 0, "logical" -String specifying if RData should be output
+# lfcReq", "l", 1, "double" -Float specifying the log-fold-change requirement
+# pValReq", "p", 1, "double" -Float specifying the p-value requirement
+# pAdjOpt", "d", 1, "character" -String specifying the p-value adjustment method
+# normOpt", "n", 1, "character" -String specifying type of normalisation used
+# robOpt", "b", 0, "logical" -String specifying if robust options should be used
+# lrtOpt", "t", 0, "logical" -String specifying whether to perform LRT test instead
+#
+# OUT:
+# MDS Plot
+# BCV Plot
+# QL Plot
+# MD Plot
+# Expression Table
+# HTML file linking to the ouputs
+# Optional:
+# Normalised counts Table
+# RData file
+#
+# Author: Shian Su - registertonysu@gmail.com - Jan 2014
+# Modified by: Maria Doyle - Oct 2017 (some code taken from the DESeq2 wrapper)
+
+# Record starting time
+timeStart <- as.character(Sys.time())
+
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# Load all required libraries
+library(methods, quietly=TRUE, warn.conflicts=FALSE)
+library(statmod, quietly=TRUE, warn.conflicts=FALSE)
+library(splines, quietly=TRUE, warn.conflicts=FALSE)
+library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
+library(limma, quietly=TRUE, warn.conflicts=FALSE)
+library(scales, quietly=TRUE, warn.conflicts=FALSE)
+library(getopt, quietly=TRUE, warn.conflicts=FALSE)
+
+################################################################################
+### Function Delcaration
+################################################################################
+# Function to sanitise contrast equations so there are no whitespaces
+# surrounding the arithmetic operators, leading or trailing whitespace
+sanitiseEquation <- function(equation) {
+ equation <- gsub(" *[+] *", "+", equation)
+ equation <- gsub(" *[-] *", "-", equation)
+ equation <- gsub(" *[/] *", "/", equation)
+ equation <- gsub(" *[*] *", "*", equation)
+ equation <- gsub("^\\s+|\\s+$", "", equation)
+ return(equation)
+}
+
+# Function to sanitise group information
+sanitiseGroups <- function(string) {
+ string <- gsub(" *[,] *", ",", string)
+ string <- gsub("^\\s+|\\s+$", "", string)
+ return(string)
+}
+
+# Function to change periods to whitespace in a string
+unmake.names <- function(string) {
+ string <- gsub(".", " ", string, fixed=TRUE)
+ return(string)
+}
+
+# Generate output folder and paths
+makeOut <- function(filename) {
+ return(paste0(opt$outPath, "/", filename))
+}
+
+# Generating design information
+pasteListName <- function(string) {
+ return(paste0("factors$", string))
+}
+
+# Create cata function: default path set, default seperator empty and appending
+# true by default (Ripped straight from the cat function with altered argument
+# defaults)
+cata <- function(..., file=opt$htmlPath, sep="", fill=FALSE, labels=NULL,
+ append=TRUE) {
+ if (is.character(file))
+ if (file == "")
+ file <- stdout()
+ else if (substring(file, 1L, 1L) == "|") {
+ file <- pipe(substring(file, 2L), "w")
+ on.exit(close(file))
+ }
+ else {
+ file <- file(file, ifelse(append, "a", "w"))
+ on.exit(close(file))
+ }
+ .Internal(cat(list(...), file, sep, fill, labels, append))
+}
+
+# Function to write code for html head and title
+HtmlHead <- function(title) {
+ cata("
\n")
+ cata("", title, "\n")
+ cata("\n")
+}
+
+# Function to write code for html links
+HtmlLink <- function(address, label=address) {
+ cata("", label, " \n")
+}
+
+# Function to write code for html images
+HtmlImage <- function(source, label=source, height=600, width=600) {
+ cata("\n")
+}
+
+# Function to write code for html list items
+ListItem <- function(...) {
+ cata("
\n")
+cata("Links to PDF copies of plots are in 'Plots' section below. \n")
+
+HtmlImage(imageData$Link[1], imageData$Label[1])
+
+for (i in 2:nrow(imageData)) {
+ HtmlImage(imageData$Link[i], imageData$Label[i])
+}
+
+cata("
Differential Expression Counts:
\n")
+
+cata("
\n")
+cata("
\n")
+TableItem()
+for (i in colnames(sigDiff)) {
+ TableHeadItem(i)
+}
+cata("
\n")
+for (i in 1:nrow(sigDiff)) {
+ cata("
\n")
+ TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+ for (j in 1:ncol(sigDiff)) {
+ TableItem(as.character(sigDiff[i, j]))
+ }
+ cata("
\n")
+}
+cata("
")
+
+cata("
Plots:
\n")
+for (i in 1:nrow(linkData)) {
+ if (grepl(".pdf", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
+}
+
+cata("
Tables:
\n")
+for (i in 1:nrow(linkData)) {
+ if (grepl(".tsv", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
+}
+
+if (wantRda) {
+ cata("
R Data Objects:
\n")
+ for (i in 1:nrow(linkData)) {
+ if (grepl(".RData", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
+ }
+}
+
+cata("
Alt-click links to download file.
\n")
+cata("
Click floppy disc icon associated history item to download ")
+cata("all files.
\n")
+cata("
.tsv files can be viewed in Excel or any spreadsheet program.
\n")
+
+cata("
Additional Information
\n")
+cata("
\n")
+
+if (filtCPM || filtSmpCount || filtTotCount) {
+ if (filtCPM) {
+ tempStr <- paste("Genes without more than", opt$cmpReq,
+ "CPM in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out.")
+ } else if (filtSmpCount) {
+ tempStr <- paste("Genes without more than", opt$cntReq,
+ "counts in at least", opt$sampleReq, "samples are insignificant",
+ "and filtered out.")
+ } else if (filtTotCount) {
+ tempStr <- paste("Genes without more than", opt$cntReq,
+ "counts, after summing counts for all samples, are insignificant",
+ "and filtered out.")
+ }
+
+ ListItem(tempStr)
+ filterProp <- round(filteredCount/preFilterCount*100, digits=2)
+ tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
+ "%) genes were filtered out for low expression.")
+ ListItem(tempStr)
+}
+ListItem(opt$normOpt, " was the method used to normalise library sizes.")
+if (wantLRT) {
+ ListItem("The edgeR likelihood ratio test was used.")
+} else {
+ if (wantRobust) {
+ ListItem("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
+ } else {
+ ListItem("The edgeR quasi-likelihood test was used.")
+ }
+}
+if (opt$pAdjOpt!="none") {
+ if (opt$pAdjOpt=="BH" || opt$pAdjOpt=="BY") {
+ tempStr <- paste0("MD-Plot highlighted genes are significant at FDR ",
+ "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, ".")
+ ListItem(tempStr)
+ } else if (opt$pAdjOpt=="holm") {
+ tempStr <- paste0("MD-Plot highlighted genes are significant at adjusted ",
+ "p-value of ", opt$pValReq," by the Holm(1979) ",
+ "method, and exhibit log2-fold-change of at least ",
+ opt$lfcReq, ".")
+ ListItem(tempStr)
+ }
+} else {
+ tempStr <- paste0("MD-Plot highlighted genes are significant at p-value ",
+ "of ", opt$pValReq," and exhibit log2-fold-change of at ",
+ "least ", opt$lfcReq, ".")
+ ListItem(tempStr)
+}
+cata("
\n")
+
+cata("
Summary of experimental data:
\n")
+
+cata("
*CHECK THAT SAMPLES ARE ASSOCIATED WITH CORRECT GROUP(S)*
\n")
+
+cata("
\n")
+cata("
\n")
+TableHeadItem("SampleID")
+TableHeadItem(names(factors)[1], " (Primary Factor)")
+
+ if (ncol(factors) > 1) {
+ for (i in names(factors)[2:length(names(factors))]) {
+ TableHeadItem(i)
+ }
+ cata("
\n")
+ }
+
+for (i in 1:nrow(factors)) {
+ cata("
\n")
+ TableHeadItem(row.names(factors)[i])
+ for (j in 1:ncol(factors)) {
+ TableItem(as.character(unmake.names(factors[i, j])))
+ }
+ cata("
\n")
+}
+cata("
")
+
+for (i in 1:nrow(linkData)) {
+ if (grepl("session_info", linkData$Link[i])) {
+ HtmlLink(linkData$Link[i], linkData$Label[i])
+ }
+}
+
+cata("
\n")
+cata("
\n")
+TableItem("Task started at:"); TableItem(timeStart)
+cata("