changeset 16:ae2aad0a6d50 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 4b17306763bc8eb92c8967c7b4b57abcc514e670
author iuc
date Wed, 04 Sep 2024 15:50:01 +0000
parents 5bf899c13979
children
files edger.R edger.xml
diffstat 2 files changed, 354 insertions(+), 356 deletions(-) [+]
line wrap: on
line diff
--- a/edger.R	Wed Nov 22 03:57:37 2023 +0000
+++ b/edger.R	Wed Sep 04 15:50:01 2024 +0000
@@ -42,8 +42,8 @@
 
 # setup R error handling to go to stderr
 options(show.error.messages = FALSE, error = function() {
-  cat(geterrmessage(), file = stderr())
-  q("no", 1, FALSE)
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, FALSE)
 })
 
 # we need that to not crash galaxy with an UTF8 error on German LC settings.
@@ -64,41 +64,41 @@
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
 sanitise_equation <- function(equation) {
-  equation <- gsub(" *[+] *", "+", equation)
-  equation <- gsub(" *[-] *", "-", equation)
-  equation <- gsub(" *[/] *", "/", equation)
-  equation <- gsub(" *[*] *", "*", equation)
-  equation <- gsub("^\\s+|\\s+$", "", equation)
-  return(equation)
+    equation <- gsub(" *[+] *", "+", equation)
+    equation <- gsub(" *[-] *", "-", equation)
+    equation <- gsub(" *[/] *", "/", equation)
+    equation <- gsub(" *[*] *", "*", equation)
+    equation <- gsub("^\\s+|\\s+$", "", equation)
+    return(equation)
 }
 
 # Function to sanitise group information
 sanitise_groups <- function(string) {
-  string <- gsub(" *[,] *", ",", string)
-  string <- gsub("^\\s+|\\s+$", "", string)
-  return(string)
+    string <- gsub(" *[,] *", ",", string)
+    string <- gsub("^\\s+|\\s+$", "", string)
+    return(string)
 }
 
 # Function to change periods to whitespace in a string
 unmake_names <- function(string) {
-  string <- gsub(".", " ", string, fixed = TRUE)
-  return(string)
+    string <- gsub(".", " ", string, fixed = TRUE)
+    return(string)
 }
 
 # Sanitise file base names coming from factors or contrasts
 sanitise_basename <- function(string) {
-  string <- gsub("[/^]", "_", string)
-  return(string)
+    string <- gsub("[/^]", "_", string)
+    return(string)
 }
 
 # Generate output folder and paths
 make_out <- function(filename) {
-  return(paste0(out_path, "/", filename))
+    return(paste0(out_path, "/", filename))
 }
 
 # Generating design information
 paste_listname <- function(string) {
-  return(paste0("factors$", string))
+    return(paste0("factors$", string))
 }
 
 # Create cata function: default path set, default seperator empty and appending
@@ -106,49 +106,49 @@
 # defaults)
 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
                  append = TRUE) {
-  if (is.character(file)) {
-    if (file == "") {
-      file <- stdout()
-    } else if (substring(file, 1L, 1L) == "|") {
-      file <- pipe(substring(file, 2L), "w")
-      on.exit(close(file))
-    } else {
-      file <- file(file, ifelse(append, "a", "w"))
-      on.exit(close(file))
+    if (is.character(file)) {
+        if (file == "") {
+            file <- stdout()
+        } else if (substring(file, 1L, 1L) == "|") {
+            file <- pipe(substring(file, 2L), "w")
+            on.exit(close(file))
+        } else {
+            file <- file(file, ifelse(append, "a", "w"))
+            on.exit(close(file))
+        }
     }
-  }
-  .Internal(cat(list(...), file, sep, fill, labels, append))
+    .Internal(cat(list(...), file, sep, fill, labels, append))
 }
 
 # Function to write code for html head and title
 html_head <- function(title) {
-  cata("<head>\n")
-  cata("<title>", title, "</title>\n")
-  cata("</head>\n")
+    cata("<head>\n")
+    cata("<title>", title, "</title>\n")
+    cata("</head>\n")
 }
 
 # Function to write code for html links
 html_link <- function(address, label = address) {
-  cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
+    cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
 }
 
 # Function to write code for html images
 html_image <- function(source, label = source, height = 600, width = 600) {
-  cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
-  cata("\" width=\"", width, "\"/>\n")
+    cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
+    cata("\" width=\"", width, "\"/>\n")
 }
 
 # Function to write code for html list items
 list_item <- function(...) {
-  cata("<li>", ..., "</li>\n")
+    cata("<li>", ..., "</li>\n")
 }
 
 table_item <- function(...) {
-  cata("<td>", ..., "</td>\n")
+    cata("<td>", ..., "</td>\n")
 }
 
 table_head_item <- function(...) {
-  cata("<th>", ..., "</th>\n")
+    cata("<th>", ..., "</th>\n")
 }
 
 ################################################################################
@@ -160,160 +160,161 @@
 
 # Get options, using the spec as defined by the enclosed list.
 # Read the options from the default: commandArgs(TRUE).
-spec <- matrix(c(
-  "htmlPath", "R", 1, "character",
-  "outPath", "o", 1, "character",
-  "filesPath", "j", 2, "character",
-  "matrixPath", "m", 2, "character",
-  "factFile", "f", 2, "character",
-  "formula", "F", 2, "character",
-  "factInput", "i", 2, "character",
-  "annoPath", "a", 2, "character",
-  "contrastData", "C", 1, "character",
-  "cpmReq", "c", 1, "double",
-  "totReq", "y", 0, "logical",
-  "cntReq", "z", 1, "integer",
-  "sampleReq", "s", 1, "integer",
-  "normCounts", "x", 0, "logical",
-  "rdaOpt", "r", 0, "logical",
-  "lfcReq", "l", 1, "double",
-  "pValReq", "p", 1, "double",
-  "pAdjOpt", "d", 1, "character",
-  "normOpt", "n", 1, "character",
-  "robOpt", "b", 0, "logical",
-  "lrtOpt", "t", 0, "logical"
-),
-byrow = TRUE, ncol = 4
+spec <- matrix(
+    c(
+        "htmlPath", "R", 1, "character",
+        "outPath", "o", 1, "character",
+        "filesPath", "j", 2, "character",
+        "matrixPath", "m", 2, "character",
+        "factFile", "f", 2, "character",
+        "formula", "F", 2, "character",
+        "factInput", "i", 2, "character",
+        "annoPath", "a", 2, "character",
+        "contrastData", "C", 1, "character",
+        "cpmReq", "c", 1, "double",
+        "totReq", "y", 0, "logical",
+        "cntReq", "z", 1, "integer",
+        "sampleReq", "s", 1, "integer",
+        "normCounts", "x", 0, "logical",
+        "rdaOpt", "r", 0, "logical",
+        "lfcReq", "l", 1, "double",
+        "pValReq", "p", 1, "double",
+        "pAdjOpt", "d", 1, "character",
+        "normOpt", "n", 1, "character",
+        "robOpt", "b", 0, "logical",
+        "lrtOpt", "t", 0, "logical"
+    ),
+    byrow = TRUE, ncol = 4
 )
 opt <- getopt(spec)
 
 
 if (is.null(opt$matrixPath) && is.null(opt$filesPath)) {
-  cat("A counts matrix (or a set of counts files) is required.\n")
-  q(status = 1)
+    cat("A counts matrix (or a set of counts files) is required.\n")
+    q(status = 1)
 }
 
 if (is.null(opt$cpmReq)) {
-  filt_cpm <- FALSE
+    filt_cpm <- FALSE
 } else {
-  filt_cpm <- TRUE
+    filt_cpm <- TRUE
 }
 
 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) {
-  filt_smpcount <- FALSE
+    filt_smpcount <- FALSE
 } else {
-  filt_smpcount <- TRUE
+    filt_smpcount <- TRUE
 }
 
 if (is.null(opt$totReq)) {
-  filt_totcount <- FALSE
+    filt_totcount <- FALSE
 } else {
-  filt_totcount <- TRUE
+    filt_totcount <- TRUE
 }
 
 if (is.null(opt$lrtOpt)) {
-  want_lrt <- FALSE
+    want_lrt <- FALSE
 } else {
-  want_lrt <- TRUE
+    want_lrt <- TRUE
 }
 
 if (is.null(opt$rdaOpt)) {
-  want_rda <- FALSE
+    want_rda <- FALSE
 } else {
-  want_rda <- TRUE
+    want_rda <- TRUE
 }
 
 if (is.null(opt$annoPath)) {
-  have_anno <- FALSE
+    have_anno <- FALSE
 } else {
-  have_anno <- TRUE
+    have_anno <- TRUE
 }
 
 if (is.null(opt$normCounts)) {
-  want_norm <- FALSE
+    want_norm <- FALSE
 } else {
-  want_norm <- TRUE
+    want_norm <- TRUE
 }
 
 if (is.null(opt$robOpt)) {
-  want_robust <- FALSE
+    want_robust <- FALSE
 } else {
-  want_robust <- TRUE
+    want_robust <- TRUE
 }
 
 
 if (!is.null(opt$filesPath)) {
-  # Process the separate count files (adapted from DESeq2 wrapper)
-  library("rjson")
-  parser <- newJSONParser()
-  parser$addData(opt$filesPath)
-  factor_list <- parser$getObject()
-  factors <- sapply(factor_list, function(x) x[[1]])
-  filenames_in <- unname(unlist(factor_list[[1]][[2]]))
-  sampletable <- data.frame(
-    sample = basename(filenames_in),
-    filename = filenames_in,
-    row.names = filenames_in,
-    stringsAsFactors = FALSE
-  )
-  for (factor in factor_list) {
-    factorname  <- factor[[1]]
-    sampletable[[factorname]] <- character(nrow(sampletable))
-    lvls <- sapply(factor[[2]], function(x) names(x))
-    for (i in seq_along(factor[[2]])) {
-      files <- factor[[2]][[i]][[1]]
-      sampletable[files, factorname] <- lvls[i]
+    # Process the separate count files (adapted from DESeq2 wrapper)
+    library("rjson")
+    parser <- newJSONParser()
+    parser$addData(opt$filesPath)
+    factor_list <- parser$getObject()
+    factors <- sapply(factor_list, function(x) x[[1]])
+    filenames_in <- unname(unlist(factor_list[[1]][[2]]))
+    sampletable <- data.frame(
+        sample = basename(filenames_in),
+        filename = filenames_in,
+        row.names = filenames_in,
+        stringsAsFactors = FALSE
+    )
+    for (factor in factor_list) {
+        factorname <- factor[[1]]
+        sampletable[[factorname]] <- character(nrow(sampletable))
+        lvls <- sapply(factor[[2]], function(x) names(x))
+        for (i in seq_along(factor[[2]])) {
+            files <- factor[[2]][[i]][[1]]
+            sampletable[files, factorname] <- lvls[i]
+        }
+        sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
     }
-    sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls)
-  }
-  rownames(sampletable) <- sampletable$sample
-  rem <- c("sample", "filename")
-  factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
+    rownames(sampletable) <- sampletable$sample
+    rem <- c("sample", "filename")
+    factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]
 
-  # read in count files and create single table
-  countfiles <- lapply(sampletable$filename, function(x) {
-    read.delim(x, row.names = 1)
-  })
-  counts <- do.call("cbind", countfiles)
+    # read in count files and create single table
+    countfiles <- lapply(sampletable$filename, function(x) {
+        read.delim(x, row.names = 1)
+    })
+    counts <- do.call("cbind", countfiles)
 } else {
-  # Process the single count matrix
-  counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
-  row.names(counts) <- counts[, 1]
-  counts <- counts[, -1]
-  countsrows <- nrow(counts)
+    # Process the single count matrix
+    counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE)
+    row.names(counts) <- counts[, 1]
+    counts <- counts[, -1]
+    countsrows <- nrow(counts)
 
-  # Process factors
-  if (is.null(opt$factInput)) {
-    factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
-    # check samples names match
-    if (!any(factordata[, 1] %in% colnames(counts))) {
-      stop("Sample IDs in factors file and count matrix don't match")
+    # Process factors
+    if (is.null(opt$factInput)) {
+        factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
+        # check samples names match
+        if (!any(factordata[, 1] %in% colnames(counts))) {
+            stop("Sample IDs in factors file and count matrix don't match")
+        }
+        # order samples as in counts matrix
+        factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
+        factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names))
+    } else {
+        factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
+        factordata <- list()
+        for (fact in factors) {
+            newfact <- unlist(strsplit(fact, split = "::"))
+            factordata <- rbind(factordata, newfact)
+        } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
+
+        # Set the row names to be the name of the factor and delete first row
+        row.names(factordata) <- factordata[, 1]
+        factordata <- factordata[, -1]
+        factordata <- sapply(factordata, sanitise_groups)
+        factordata <- sapply(factordata, strsplit, split = ",")
+        factordata <- sapply(factordata, make.names)
+        # Transform factor data into data frame of R factor objects
+        factors <- data.frame(factordata)
     }
-    # order samples as in counts matrix
-    factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
-    factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names))
-  } else {
-    factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
-    factordata <- list()
-    for (fact in factors) {
-      newfact <- unlist(strsplit(fact, split = "::"))
-      factordata <- rbind(factordata, newfact)
-    } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
-
-    # Set the row names to be the name of the factor and delete first row
-    row.names(factordata) <- factordata[, 1]
-    factordata <- factordata[, -1]
-    factordata <- sapply(factordata, sanitise_groups)
-    factordata <- sapply(factordata, strsplit, split = ",")
-    factordata <- sapply(factordata, make.names)
-    # Transform factor data into data frame of R factor objects
-    factors <- data.frame(factordata)
-  }
 }
 
 # if annotation file provided
 if (have_anno) {
-  geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
+    geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
 }
 
 # Create output directory
@@ -322,10 +323,10 @@
 
 # Check if contrastData is a file or not
 if (file.exists(opt$contrastData)) {
-  contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]])
+    contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]])
 } else {
-  # Split up contrasts separated by comma into a vector then sanitise
-  contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
+    # Split up contrasts separated by comma into a vector then sanitise
+    contrast_data <- unlist(strsplit(opt$contrastData, split = ","))
 }
 contrast_data <- sanitise_equation(contrast_data)
 contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE)
@@ -337,16 +338,16 @@
 mds_pdf <- character() # Initialise character vector
 mds_png <- character()
 for (i in seq_len(ncol(factors))) {
-  mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
-  mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
+    mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
+    mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
 }
 md_pdf <- character()
 md_png <- character()
 top_out <- character()
 for (i in seq_along(contrast_data)) {
-  md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
-  md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
-  top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
+    md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
+    md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
+    top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
 } # Save output paths for each contrast as vectors
 norm_out <- make_out("edgeR_normcounts.tsv")
 rda_out <- make_out("edgeR_analysis.RData")
@@ -370,11 +371,11 @@
 data <- list()
 data$counts <- counts
 if (have_anno) {
-  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
-  annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
-  data$genes <- annoord
+    # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+    annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
+    data$genes <- annoord
 } else {
-  data$genes <- data.frame(GeneID = row.names(counts))
+    data$genes <- data.frame(GeneID = row.names(counts))
 }
 
 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
@@ -382,16 +383,16 @@
 prefilter_count <- nrow(data$counts)
 
 if (filt_cpm || filt_smpcount || filt_totcount) {
-  if (filt_totcount) {
-    keep <- rowSums(data$counts) >= opt$cntReq
-  } else if (filt_smpcount) {
-    keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
-  } else if (filt_cpm) {
-    keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
-  }
+    if (filt_totcount) {
+        keep <- rowSums(data$counts) >= opt$cntReq
+    } else if (filt_smpcount) {
+        keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
+    } else if (filt_cpm) {
+        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+    }
 
-  data$counts <- data$counts[keep, ]
-  data$genes <- data$genes[keep, , drop = FALSE]
+    data$counts <- data$counts[keep, ]
+    data$genes <- data$genes[keep, , drop = FALSE]
 }
 
 postfilter_count <- nrow(data$counts)
@@ -411,32 +412,32 @@
 
 
 if (!is.null(opt$formula)) {
-  formula <- opt$formula
-  # sanitisation can be getting rid of the "~"
-  if (!startsWith(formula, "~")) {
-    formula <- paste0("~", formula)
-  }
+    formula <- opt$formula
+    # sanitisation can be getting rid of the "~"
+    if (!startsWith(formula, "~")) {
+        formula <- paste0("~", formula)
+    }
 } else {
-  formula <- "~0"
-  for (i in seq_along(factor_list)) {
-    formula <- paste(formula, factor_list[i], sep = "+")
-  }
+    formula <- "~0"
+    for (i in seq_along(factor_list)) {
+        formula <- paste(formula, factor_list[i], sep = "+")
+    }
 }
 
 formula <- formula(formula)
 design <- model.matrix(formula, factors)
 
 for (i in seq_along(factor_list)) {
-  colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
+    colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE)
 }
 
 # Calculating normalising factor, estimating dispersion
 data <- calcNormFactors(data, method = opt$normOpt)
 
 if (want_robust) {
-  data <- estimateDisp(data, design = design, robust = TRUE)
+    data <- estimateDisp(data, design = design, robust = TRUE)
 } else {
-  data <- estimateDisp(data, design = design)
+    data <- estimateDisp(data, design = design)
 }
 
 # Generate contrasts information
@@ -466,21 +467,21 @@
 
 # If additional factors create additional MDS plots coloured by factor
 if (ncol(factors) > 1) {
-  for (i in 2:ncol(factors)) {
-    png(mds_png[i], width = 600, height = 600)
-    plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-    img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
-    img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
-    image_data <- rbind(image_data, c(img_name, img_addr))
-    invisible(dev.off())
+    for (i in 2:ncol(factors)) {
+        png(mds_png[i], width = 600, height = 600)
+        plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
+        img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
+        img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
+        image_data <- rbind(image_data, c(img_name, img_addr))
+        invisible(dev.off())
 
-    pdf(mds_pdf[i])
-    plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
-    link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
-    link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
-    link_data <- rbind(link_data, c(link_name, link_addr))
-    invisible(dev.off())
-  }
+        pdf(mds_pdf[i])
+        plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
+        link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
+        link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
+        link_data <- rbind(link_data, c(link_name, link_addr))
+        invisible(dev.off())
+    }
 }
 
 # BCV Plot
@@ -500,111 +501,111 @@
 
 # Generate fit
 if (want_lrt) {
-  fit <- glmFit(data, design)
+    fit <- glmFit(data, design)
 } else {
-  if (want_robust) {
-    fit <- glmQLFit(data, design, robust = TRUE)
-  } else {
-    fit <- glmQLFit(data, design)
-  }
+    if (want_robust) {
+        fit <- glmQLFit(data, design, robust = TRUE)
+    } else {
+        fit <- glmQLFit(data, design)
+    }
 
-  # Plot QL dispersions
-  png(ql_png, width = 600, height = 600)
-  plotQLDisp(fit, main = "QL Plot")
-  img_name <- "QL Plot"
-  img_addr <- "qlplot.png"
-  image_data <- rbind(image_data, c(img_name, img_addr))
-  invisible(dev.off())
+    # Plot QL dispersions
+    png(ql_png, width = 600, height = 600)
+    plotQLDisp(fit, main = "QL Plot")
+    img_name <- "QL Plot"
+    img_addr <- "qlplot.png"
+    image_data <- rbind(image_data, c(img_name, img_addr))
+    invisible(dev.off())
 
-  pdf(ql_pdf)
-  plotQLDisp(fit, main = "QL Plot")
-  link_name <- "QL Plot.pdf"
-  link_addr <- "qlplot.pdf"
-  link_data <- rbind(link_data, c(link_name, link_addr))
-  invisible(dev.off())
+    pdf(ql_pdf)
+    plotQLDisp(fit, main = "QL Plot")
+    link_name <- "QL Plot.pdf"
+    link_addr <- "qlplot.pdf"
+    link_data <- rbind(link_data, c(link_name, link_addr))
+    invisible(dev.off())
 }
 
 # Save normalised counts (log2cpm)
 if (want_norm) {
-  normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
-  normalised_counts <- data.frame(data$genes, normalised_counts)
-  write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
-  link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
+    normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE)
+    normalised_counts <- data.frame(data$genes, normalised_counts)
+    write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
+    link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv"))
 }
 
 
 for (i in seq_along(contrast_data)) {
-  if (want_lrt) {
-    res <- glmLRT(fit, contrast = contrasts[, i])
-  } else {
-    res <- glmQLFTest(fit, contrast = contrasts[, i])
-  }
+    if (want_lrt) {
+        res <- glmLRT(fit, contrast = contrasts[, i])
+    } else {
+        res <- glmQLFTest(fit, contrast = contrasts[, i])
+    }
 
-  status <- decideTestsDGE(res,
-    adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
-    lfc = opt$lfcReq
-  )
-  sum_status <- summary(status)
+    status <- decideTestsDGE(res,
+        adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
+        lfc = opt$lfcReq
+    )
+    sum_status <- summary(status)
 
-  # Collect counts for differential expression
-  up_count[i] <- sum_status["Up", ]
-  down_count[i] <- sum_status["Down", ]
-  flat_count[i] <- sum_status["NotSig", ]
+    # Collect counts for differential expression
+    up_count[i] <- sum_status["Up", ]
+    down_count[i] <- sum_status["Down", ]
+    flat_count[i] <- sum_status["NotSig", ]
 
-  # Write top expressions table
-  top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
-  write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
+    # Write top expressions table
+    top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
+    write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
 
-  link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
-  link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
-  link_data <- rbind(link_data, c(link_name, link_addr))
+    link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
+    link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
+    link_data <- rbind(link_data, c(link_name, link_addr))
 
-  # Plot MD (log ratios vs mean difference) using limma package
-  pdf(md_pdf[i])
-  limma::plotMD(res,
-    status = status,
-    main = paste("MD Plot:", unmake_names(contrast_data[i])),
-    hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-    xlab = "Average Expression", ylab = "logFC"
-  )
+    # Plot MD (log ratios vs mean difference) using limma package
+    pdf(md_pdf[i])
+    limma::plotMD(res,
+        status = status,
+        main = paste("MD Plot:", unmake_names(contrast_data[i])),
+        hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
+        xlab = "Average Expression", ylab = "logFC"
+    )
 
-  abline(h = 0, col = "grey", lty = 2)
+    abline(h = 0, col = "grey", lty = 2)
 
-  link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
-  link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
-  link_data <- rbind(link_data, c(link_name, link_addr))
-  invisible(dev.off())
+    link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
+    link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
+    link_data <- rbind(link_data, c(link_name, link_addr))
+    invisible(dev.off())
 
-  png(md_png[i], height = 600, width = 600)
-  limma::plotMD(res,
-    status = status,
-    main = paste("MD Plot:", unmake_names(contrast_data[i])),
-    hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-    xlab = "Average Expression", ylab = "logFC"
-  )
+    png(md_png[i], height = 600, width = 600)
+    limma::plotMD(res,
+        status = status,
+        main = paste("MD Plot:", unmake_names(contrast_data[i])),
+        hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
+        xlab = "Average Expression", ylab = "logFC"
+    )
 
-  abline(h = 0, col = "grey", lty = 2)
+    abline(h = 0, col = "grey", lty = 2)
 
-  img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
-  img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
-  image_data <- rbind(image_data, c(img_name, img_addr))
-  invisible(dev.off())
+    img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
+    img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
+    image_data <- rbind(image_data, c(img_name, img_addr))
+    invisible(dev.off())
 }
 sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count)
 row.names(sig_diff) <- contrast_data
 
 # Save relevant items as rda object
 if (want_rda) {
-  if (want_norm) {
-    save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
-      file = rda_out, ascii = TRUE
-    )
-  } else {
-    save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
-      file = rda_out, ascii = TRUE
-    )
-  }
-  link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
+    if (want_norm) {
+        save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design,
+            file = rda_out, ascii = TRUE
+        )
+    } else {
+        save(counts, data, status, labels, factors, fit, res, top, contrasts, design,
+            file = rda_out, ascii = TRUE
+        )
+    }
+    link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData"))
 }
 
 # Record session info
@@ -632,7 +633,7 @@
 html_image(image_data$Link[1], image_data$Label[1])
 
 for (i in 2:nrow(image_data)) {
-  html_image(image_data$Link[i], image_data$Label[i])
+    html_image(image_data$Link[i], image_data$Label[i])
 }
 
 cata("<h4>Differential Expression Counts:</h4>\n")
@@ -641,40 +642,40 @@
 cata("<tr>\n")
 table_item()
 for (i in colnames(sig_diff)) {
-  table_head_item(i)
+    table_head_item(i)
 }
 cata("</tr>\n")
 for (i in seq_len(nrow(sig_diff))) {
-  cata("<tr>\n")
-  table_head_item(unmake_names(row.names(sig_diff)[i]))
-  for (j in seq_len(ncol(sig_diff))) {
-    table_item(as.character(sig_diff[i, j]))
-  }
-  cata("</tr>\n")
+    cata("<tr>\n")
+    table_head_item(unmake_names(row.names(sig_diff)[i]))
+    for (j in seq_len(ncol(sig_diff))) {
+        table_item(as.character(sig_diff[i, j]))
+    }
+    cata("</tr>\n")
 }
 cata("</table>")
 
 cata("<h4>Plots:</h4>\n")
 for (i in seq_len(nrow(link_data))) {
-  if (grepl(".pdf", link_data$Link[i])) {
-    html_link(link_data$Link[i], link_data$Label[i])
-  }
+    if (grepl(".pdf", link_data$Link[i])) {
+        html_link(link_data$Link[i], link_data$Label[i])
+    }
 }
 
 cata("<h4>Tables:</h4>\n")
 for (i in seq_len(nrow(link_data))) {
-  if (grepl(".tsv", link_data$Link[i])) {
-    html_link(link_data$Link[i], link_data$Label[i])
-  }
+    if (grepl(".tsv", link_data$Link[i])) {
+        html_link(link_data$Link[i], link_data$Label[i])
+    }
 }
 
 if (want_rda) {
-  cata("<h4>R Data Objects:</h4>\n")
-  for (i in seq_len(nrow(link_data))) {
-    if (grepl(".RData", link_data$Link[i])) {
-      html_link(link_data$Link[i], link_data$Label[i])
+    cata("<h4>R Data Objects:</h4>\n")
+    for (i in seq_len(nrow(link_data))) {
+        if (grepl(".RData", link_data$Link[i])) {
+            html_link(link_data$Link[i], link_data$Label[i])
+        }
     }
-  }
 }
 
 cata("<p>Alt-click links to download file.</p>\n")
@@ -686,68 +687,68 @@
 cata("<ul>\n")
 
 if (filt_cpm || filt_smpcount || filt_totcount) {
-  if (filt_cpm) {
-    temp_str <- paste(
-      "Genes without more than", opt$cpmReq,
-      "CPM in at least", opt$sampleReq, "samples are insignificant",
-      "and filtered out."
-    )
-  } else if (filt_smpcount) {
-    temp_str <- paste(
-      "Genes without more than", opt$cntReq,
-      "counts in at least", opt$sampleReq, "samples are insignificant",
-      "and filtered out."
+    if (filt_cpm) {
+        temp_str <- paste(
+            "Genes without more than", opt$cpmReq,
+            "CPM in at least", opt$sampleReq, "samples are insignificant",
+            "and filtered out."
+        )
+    } else if (filt_smpcount) {
+        temp_str <- paste(
+            "Genes without more than", opt$cntReq,
+            "counts in at least", opt$sampleReq, "samples are insignificant",
+            "and filtered out."
+        )
+    } else if (filt_totcount) {
+        temp_str <- paste(
+            "Genes without more than", opt$cntReq,
+            "counts, after summing counts for all samples, are insignificant",
+            "and filtered out."
+        )
+    }
+
+    list_item(temp_str)
+    filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
+    temp_str <- paste0(
+        filtered_count, " of ", prefilter_count, " (", filter_prop,
+        "%) genes were filtered out for low expression."
     )
-  } else if (filt_totcount) {
-    temp_str <- paste(
-      "Genes without more than", opt$cntReq,
-      "counts, after summing counts for all samples, are insignificant",
-      "and filtered out."
-    )
-  }
-
-  list_item(temp_str)
-  filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
-  temp_str <- paste0(
-    filtered_count, " of ", prefilter_count, " (", filter_prop,
-    "%) genes were filtered out for low expression."
-  )
-  list_item(temp_str)
+    list_item(temp_str)
 }
 list_item(opt$normOpt, " was the method used to normalise library sizes.")
 if (want_lrt) {
-  list_item("The edgeR likelihood ratio test was used.")
+    list_item("The edgeR likelihood ratio test was used.")
 } else {
-  if (want_robust) {
-    list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
-  } else {
-    list_item("The edgeR quasi-likelihood test was used.")
-  }
+    if (want_robust) {
+        list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).")
+    } else {
+        list_item("The edgeR quasi-likelihood test was used.")
+    }
 }
 if (opt$pAdjOpt != "none") {
-  if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
+    if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
+        temp_str <- paste0(
+            "MD-Plot highlighted genes are significant at FDR ",
+            "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+            "least ", opt$lfcReq, "."
+        )
+        list_item(temp_str)
+    } else if (opt$pAdjOpt == "holm") {
+        temp_str <- paste0(
+            "MD-Plot highlighted genes are significant at adjusted ",
+            "p-value of ", opt$pValReq, "  by the Holm(1979) ",
+            "method, and exhibit log2-fold-change of at least ",
+            opt$lfcReq, "."
+        )
+        list_item(temp_str)
+    }
+} else {
     temp_str <- paste0(
-      "MD-Plot highlighted genes are significant at FDR ",
-      "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-      "least ", opt$lfcReq, "."
+        "MD-Plot highlighted genes are significant at p-value ",
+        "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+        "least ", opt$lfcReq, "."
     )
     list_item(temp_str)
-  } else if (opt$pAdjOpt == "holm") {
-    temp_str <- paste0(
-      "MD-Plot highlighted genes are significant at adjusted ",
-      "p-value of ", opt$pValReq, "  by the Holm(1979) ",
-      "method, and exhibit log2-fold-change of at least ",
-      opt$lfcReq, "."
-    )
-    list_item(temp_str)
-  }
-} else {
-  temp_str <- paste0(
-    "MD-Plot highlighted genes are significant at p-value ",
-    "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-    "least ", opt$lfcReq, "."
-  )
-  list_item(temp_str)
 }
 cata("</ul>\n")
 
@@ -761,26 +762,26 @@
 table_head_item(names(factors)[1], " (Primary Factor)")
 
 if (ncol(factors) > 1) {
-  for (i in names(factors)[2:length(names(factors))]) {
-    table_head_item(i)
-  }
-  cata("</tr>\n")
+    for (i in names(factors)[2:length(names(factors))]) {
+        table_head_item(i)
+    }
+    cata("</tr>\n")
 }
 
 for (i in seq_len(nrow((factors)))) {
-  cata("<tr>\n")
-  table_head_item(row.names(factors)[i])
-  for (j in seq_len(ncol(factors))) {
-    table_item(as.character(unmake_names(factors[i, j])))
-  }
-  cata("</tr>\n")
+    cata("<tr>\n")
+    table_head_item(row.names(factors)[i])
+    for (j in seq_len(ncol(factors))) {
+        table_item(as.character(unmake_names(factors[i, j])))
+    }
+    cata("</tr>\n")
 }
 cata("</table>")
 
 for (i in seq_len(nrow(link_data))) {
-  if (grepl("session_info", link_data$Link[i])) {
-    html_link(link_data$Link[i], link_data$Label[i])
-  }
+    if (grepl("session_info", link_data$Link[i])) {
+        html_link(link_data$Link[i], link_data$Label[i])
+    }
 }
 
 cata("<table border=\"0\">\n")
--- a/edger.xml	Wed Nov 22 03:57:37 2023 +0000
+++ b/edger.xml	Wed Sep 04 15:50:01 2024 +0000
@@ -4,7 +4,7 @@
     </description>
     <macros>
         <token name="@TOOL_VERSION@">3.36.0</token>
-        <token name="@VERSION_SUFFIX@">4</token>
+        <token name="@VERSION_SUFFIX@">5</token>
     </macros>
     <edam_topics>
         <edam_topic>topic_3308</edam_topic>
@@ -130,11 +130,8 @@
             <when value="files">
                 <repeat name="rep_factor" title="Factor" min="1">
                     <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Genotype). One factor must be entered and there must be two or more groups per factor. Optional additional factors (e.g. Batch) can be entered using the Insert Factor button below, see Help section for more information. NOTE: Please only use letters, numbers or underscores, and the first character of each factor must be a letter">
-                        <sanitizer>
-                            <valid initial="string.letters,string.digits">
-                                <add value="_"/>
-                            </valid>
-                        </sanitizer>
+                        <validator type="empty_field"/>
+                        <validator type="regex" message="Please only use letters, numbers or underscores">^[\w]+$</validator>
                     </param>
                     <repeat name="rep_group" title="Group" min="2" default="2">
                         <param name="groupName" type="text" label="Name" help="Name of group that the counts files belong to (e.g. WT or Mut). NOTE: Please only use letters, numbers or underscores (case sensitive), and the first character of each group must be a letter">