changeset 2:ba2111ae6eb4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit ee9b940a5ffa6c52e42a89e55c0f5bdda0a386d5
author iuc
date Mon, 28 Jan 2019 18:39:06 -0500
parents 73281fbdf6c1
children 31ea4992b948
files egsea.R egsea.xml test-data/il13.group test-data/out_rscript.txt test-data/ranked-h-gene-sets-IL13-IL13Ant.txt test-data/ranked-h-gene-sets-IL13Ant-IL13.txt test-data/ranked-h-gene-sets-IL13Ant-IL13_batch.txt test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt test-data/ranked-h-gene-sets-compare.txt test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt
diffstat 10 files changed, 121 insertions(+), 270 deletions(-) [+]
line wrap: on
line diff
--- a/egsea.R	Thu Feb 15 02:34:59 2018 -0500
+++ b/egsea.R	Mon Jan 28 18:39:06 2019 -0500
@@ -98,14 +98,19 @@
 
 } else {
  # Process the single count matrix
-    counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
+    counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
     row.names(counts) <- counts[, 1]
     counts <- counts[ , -1]
     countsRows <- nrow(counts)
 
     # Process factors
     if (is.null(args$factInput)) {
-            factorData <- read.table(args$factFile, header=TRUE, sep="\t")
+            factorData <- read.table(args$factFile, header=TRUE, sep="\t", strip.white=TRUE)
+            # check samples names match
+            if(!any(factorData[, 1] %in% colnames(counts)))
+                stop("Sample IDs in factors file and count matrix don't match")
+            # order samples as in counts matrix
+            factorData <- factorData[match(colnames(counts), factorData[, 1]), ]
             factors <- factorData[, -1, drop=FALSE]
     }  else {
             factors <- unlist(strsplit(args$factInput, "|", fixed=TRUE))
--- a/egsea.xml	Thu Feb 15 02:34:59 2018 -0500
+++ b/egsea.xml	Mon Jan 28 18:39:06 2019 -0500
@@ -1,9 +1,9 @@
-<tool id="egsea" name="EGSEA" version="1.6.0.1">
+<tool id="egsea" name="EGSEA" version="1.8.0">
     <description> easy and efficient ensemble gene set testing</description>
     <requirements>
-        <requirement type="package" version="1.6.0">bioconductor-egsea</requirement>
-        <requirement type="package" version="1.4.4">r-optparse</requirement>
-        <requirement type="package" version="0.2.15">r-rjson</requirement>
+        <requirement type="package" version="1.8.0">bioconductor-egsea</requirement>
+        <requirement type="package" version="1.6.0">r-optparse</requirement>
+        <requirement type="package" version="0.2.20">r-rjson</requirement>
         <!--statmod is required for fry-->
         <requirement type="package" version="1.4.30">r-statmod</requirement>
     </requirements>
@@ -264,7 +264,7 @@
 
     <tests>
         <!-- Ensure report is output -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix" />
             <param name="counts" value="il13.counts"/>
@@ -277,7 +277,12 @@
                 <param name="contrast" value="IL13Ant-IL13"/>
             </repeat>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
             <output name="outReport">
                 <assert_contents>
@@ -286,7 +291,7 @@
             </output>
         </test>
         <!-- Ensure factors file input works and Rscript is output-->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="3">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -303,12 +308,21 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
-            <output name="outRscript" value="out_rscript.txt"/>
+            <output name="outRscript">
+                <assert_contents>
+                    <has_text_matching expression="save.image" />
+                </assert_contents>
+            </output>
         </test>
         <!-- Ensure two contrasts works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -322,13 +336,22 @@
                 <param name="contrast" value="IL13-IL13Ant"/>
             </repeat>
             <output_collection name="outTables" count="3">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
-                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-IL13-IL13Ant.txt"/>
-                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-compare.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
+                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure two factors works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -339,11 +362,16 @@
                 <param name="contrast" value="IL13Ant-IL13"/>
             </repeat>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*6.7" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure all gene set methods work -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -355,11 +383,16 @@
             </repeat>
             <param name="base_methods" value="camera,safe,gage,zscore,gsva,globaltest,ora,ssgsea,padog,plage,fry,roast"/>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*2.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure KEGG updated works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -372,8 +405,51 @@
             <param name="keggdb_gsets" value="keggmet"/>
             <param name="kegg_updated" value="True"/>
             <output_collection name="outTables" count="2">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
-                <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-kegg-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*NumGenes.*Type.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*hsa00290.*Valine, leucine and isoleucine biosynthesis.*4/4.*Metabolism.*5.1" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- Ensure individual counts files works -->
+        <test expect_num_outputs="2">
+            <param name="non_commercial_use" value="True"/>
+            <param name="format" value="files" />
+            <repeat name="rep_factor">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_group">
+                    <param name="groupName" value="IL13"/>
+                    <param name="countsFile" value="IL13-1.counts,IL13-2.counts,IL13-3.counts"/>
+                </repeat>
+                <repeat name="rep_group">
+                    <param name="groupName" value="IL13Ant"/>
+                    <param name="countsFile" value="IL13Ant-1.counts,IL13Ant-2.counts"/>
+                </repeat>
+            </repeat>
+            <repeat name="rep_factor">
+                <param name="factorName" value="Batch"/>
+                <repeat name="rep_group">
+                    <param name="groupName" value="b1"/>
+                    <param name="countsFile" value="IL13-1.counts,IL13Ant-1.counts"/>
+                </repeat>
+                <repeat name="rep_group">
+                    <param name="groupName" value="b2"/>
+                    <param name="countsFile" value="IL13-2.counts,IL13-3.counts,IL13Ant-2.counts"/>
+                </repeat>
+            </repeat>
+            <param name="genes" value="il13.genes"/>
+            <repeat name="rep_contrast">
+                <param name="contrast" value="IL13Ant-IL13"/>
+            </repeat>
+            <output_collection name="outTables" count="1">
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*6.7" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
     </tests>
@@ -409,11 +485,23 @@
 
 **Inputs**
 
-**Counts Matrix**
+**Counts Data**
+
+This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The counts data can either be input as separate counts files (one sample per file) or a single count matrix (one sample per column). The rows correspond to genes, and columns correspond to the counts for the samples. Values must be tab separated, with the first row containing the sample/column labels. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool. Genes with low counts should be removed, such as in the filtered counts matrix that can be output from the **limma** tool.
+
+Example - **Separate Count Files**:
 
-This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool.
+    =============== ==========
+    EntrezID        **WT1**
+    =============== ==========
+    1               71
+    1000            3
+    10000           2310
+    100009605       3
+    100009613       9
+    =============== ==========
 
-Example:
+Example - **Single Count Matrix**:
 
     =============== ========== ========== ========== ========= ========= =========
     EntrezID        **WT1**    **WT2**    **WT3**    **Mut1**  **Mut2**  **Mut3**
@@ -427,7 +515,7 @@
 
 **Factor Information**
 
-Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch.
+Enter factor names and groups in the tool form, or provide a tab-separated file that has the names of the samples in the first column and one header row. The sample names must be the same as the names in the columns of the count matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch.
 
 Example:
 
@@ -444,7 +532,7 @@
 
 *Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model.
 
-*Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
+*Groups:* The names of the groups for the factor. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
 
 **Symbols Mapping file**
 
--- a/test-data/il13.group	Thu Feb 15 02:34:59 2018 -0500
+++ b/test-data/il13.group	Mon Jan 28 18:39:06 2019 -0500
@@ -1,6 +1,6 @@
 SampleID	Treatment
-IL13-1	IL13
 IL13Ant-1	IL13Ant
 IL13-2	IL13
 IL13-3	IL13
 IL13Ant-2	IL13Ant
+IL13-1	IL13
--- a/test-data/out_rscript.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,206 +0,0 @@
-# Code based on (and inspired by) the Galaxy limma-voom/edgeR/DESeq2 wrappers
-
-options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-
-# we need that to not crash galaxy with an UTF8 error on German LC settings.
-loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
-
-suppressPackageStartupMessages({
-    library(EGSEA)
-    library(limma)
-    library(edgeR)
-    library(optparse)
-})
-
-
-## Function Declaration
-
-sanitiseEquation <- function(equation) {
-    equation <- gsub(" *[+] *", "+", equation)
-    equation <- gsub(" *[-] *", "-", equation)
-    equation <- gsub(" *[/] *", "/", equation)
-    equation <- gsub(" *[*] *", "*", equation)
-    equation <- gsub("^\\s+|\\s+$", "", equation)
-    return(equation)
-}
-
-# Function to sanitise group information
-sanitiseGroups <- function(string) {
-    string <- gsub(" *[,] *", ",", string)
-    string <- gsub("^\\s+|\\s+$", "", string)
-    return(string)
-}
-
-# Generating design information
-pasteListName <- function(string) {
-    return(paste0("factors$", string))
-}
-
-## Input Processing
-
-option_list <- list(
-    make_option(c("-threads", "--threads"), default=2, type="integer", help="Number of threads for egsea"),
-    make_option(c("-filesPath", "--filesPath"), type="character", help="JSON list object if multiple files input"),
-    make_option(c("-matrixPath", "--matrixPath"), type="character", help="Path to count matrix"),
-    make_option(c("-factFile", "--factFile"), type="character", help="Path to factor information file"),
-    make_option(c("-factInput", "--factInput"), type="character", help="String containing factors if manually input"),
-    make_option(c("-contrastData", "--contrastData"), type="character", help="Contrasts of Interest (Groups to compare)"),
-    make_option(c("-genes", "--genes"), type="character", help="Path to genes file"),
-    make_option(c("-species", "--species"), type="character"),
-    make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"),
-    make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"),
-    make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"),
-    make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"),
-    make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"),
-    make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"),
-    make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"),
-    make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"),
-    make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"),
-    make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"),
-    make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file")
-    )
-
-parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
-args = parse_args(parser)
-
-
-## Read in Files
-
-if (!is.null(args$filesPath)) {
-    # Process the separate count files (adapted from DESeq2 wrapper)
-    library("rjson")
-    parser <- newJSONParser()
-    parser$addData(args$filesPath)
-    factorList <- parser$getObject()
-    factors <- sapply(factorList, function(x) x[[1]])
-    filenamesIn <- unname(unlist(factorList[[1]][[2]]))
-    sampleTable <- data.frame(sample=basename(filenamesIn),
-                            filename=filenamesIn,
-                            row.names=filenamesIn,
-                            stringsAsFactors=FALSE)
-    for (factor in factorList) {
-        factorName <- factor[[1]]
-        sampleTable[[factorName]] <- character(nrow(sampleTable))
-        lvls <- sapply(factor[[2]], function(x) names(x))
-        for (i in seq_along(factor[[2]])) {
-            files <- factor[[2]][[i]][[1]]
-            sampleTable[files,factorName] <- lvls[i]
-        }
-        sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls)
-    }
-    rownames(sampleTable) <- sampleTable$sample
-    rem <- c("sample","filename")
-    factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE]
-
-    #read in count files and create single table
-    countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)})
-    counts <- do.call("cbind", countfiles)
-
-} else {
- # Process the single count matrix
-    counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE)
-    row.names(counts) <- counts[, 1]
-    counts <- counts[ , -1]
-    countsRows <- nrow(counts)
-
-    # Process factors
-    if (is.null(args$factInput)) {
-            factorData <- read.table(args$factFile, header=TRUE, sep="\t")
-            factors <- factorData[, -1, drop=FALSE]
-    }  else {
-            factors <- unlist(strsplit(args$factInput, "|", fixed=TRUE))
-            factorData <- list()
-            for (fact in factors) {
-                newFact <- unlist(strsplit(fact, split="::"))
-                factorData <- rbind(factorData, newFact)
-            } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor.
-
-            # Set the row names to be the name of the factor and delete first row
-            row.names(factorData) <- factorData[, 1]
-            factorData <- factorData[, -1]
-            factorData <- sapply(factorData, sanitiseGroups)
-            factorData <- sapply(factorData, strsplit, split=",")
-            factorData <- sapply(factorData, make.names)
-            # Transform factor data into data frame of R factor objects
-            factors <- data.frame(factorData)
-    }
-}
-
-# Create a DGEList object
-counts <- DGEList(counts)
-
-# Set group to be the Primary Factor input
-group <- factors[, 1, drop=FALSE]
-
-# Split up contrasts separated by comma into a vector then sanitise
-contrastData <- unlist(strsplit(args$contrastData, split=","))
-contrastData <- sanitiseEquation(contrastData)
-contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
-
-# Creating design
-row.names(factors) <- colnames(counts)
-factorList <- sapply(names(factors), pasteListName)
-
-formula <- "~0"
-for (i in 1:length(factorList)) {
-    formula <- paste(formula, factorList[i], sep="+")
-}
-formula <- formula(formula)
-
-design <- model.matrix(formula)
-
-for (i in 1:length(factorList)) {
-    colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE)
-}
-
-## Generate Contrasts information
-contrasts <- makeContrasts(contrasts=contrastData, levels=design)
-
-
-## Add Gene Symbol information
-
-genes <- read.table(args$genes, sep='\t', header=TRUE)
-
-
-## Set Gene Set Testing Methods
-
-base_methods <- unlist(strsplit(args$base_methods, ","))
-
-
-## Set Gene Sets
-
-if (args$msigdb != "None") {
-    msigdb <- unlist(strsplit(args$msigdb, ","))
-} else {
-    msigdb <- "none"
-}
-
-if (args$keggdb != "None") {
-    keggdb <- unlist(strsplit(args$keggdb, ","))
-    kegg_all <- c("Metabolism"="keggmet", "Signaling"="keggsig", "Disease"="keggdis")
-    kegg_exclude <- names(kegg_all[!(kegg_all %in% keggdb)])
-} else {
-    kegg_exclude <- "all"
-}
-
-if (args$gsdb != "None") {
-    gsdb <- unlist(strsplit(args$gsdb, ","))
-} else {
-    gsdb <- "none"
-}
-
-## Index gene sets
-
-gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated)
-
-
-## Run egsea.cnt
-
-gsa <- egsea.cnt(counts=counts, group=group, design=design, contrasts=contrasts, gs.annots=gs.annots, symbolsMap=genes, baseGSEAs=base_methods, minSize=args$min_size, display.top=args$display_top, combineMethod=args$combine_method, sort.by=args$sort_method, report.dir='./report_dir', fdr.cutoff=args$fdr_cutoff, num.threads=args$threads, report=TRUE)
-
-
-## Output RData file
-
-if (!is.null(args$rdaOpt)) {
-  save.image(file = "EGSEA_analysis.RData")
-}
\ No newline at end of file
--- a/test-data/ranked-h-gene-sets-IL13-IL13Ant.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
-1	M5890	HALLMARK_TNFA_SIGNALING_VIA_NFKB	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html	Genes regulated by NF-kB in response to TNF [GeneID=7124].		181/200	Arthur Liberzon	3.69843944783645e-09	6.16406574639408e-08	5	10	3	1.23281315079864e-09	1	0.588637134386457	-0.680899745245093	-1	20.6439242624481	3	26	1
-2	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.0440661300653943	0.169485115636132	5	6.66666666666667	5	0.014909910604653	2	0.412052814817513	0.432421220391424	1	1.53878145448751	13	2	5
-3	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	4.79647602359615e-07	4.79647602359616e-06	5	12.3333333333333	5	1.59882559682303e-07	3	0.342550383131631	0.340902674013207	1	8.85877098589516	5	3	29
-4	M5903	HALLMARK_NOTCH_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html	Genes up-regulated by activation of Notch signaling.		28/32	Arthur Liberzon	0.271	0.586487804394038	40	16.6666666666667	11	0.1	1	0.623573196381421	0.703516699577552	1	0.69638194408607	38	1	11
-5	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	2.56397784509287e-08	3.20497230636609e-07	5	15.6666666666667	12	8.54659289002046e-09	4	0.724979825102502	-0.798585365579773	-1	22.9015712966849	4	12	31
--- a/test-data/ranked-h-gene-sets-IL13Ant-IL13.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
-1	M5890	HALLMARK_TNFA_SIGNALING_VIA_NFKB	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html	Genes regulated by NF-kB in response to TNF [GeneID=7124].		181/200	Arthur Liberzon	3.69843944783645e-09	6.16406574639408e-08	5	10	3	1.23281315079864e-09	1	0.588637134386457	0.680899745245093	1	20.6439242624481	3	26	1
-2	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.0440661300653943	0.169485115636132	5	6.66666666666667	5	0.014909910604653	2	0.412052814817513	-0.432421220391424	-1	1.53878145448751	13	2	5
-3	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	4.79647602359615e-07	4.79647602359616e-06	5	12.3333333333333	5	1.59882559682303e-07	3	0.342550383131631	-0.340902674013207	-1	8.85877098589516	5	3	29
-4	M5903	HALLMARK_NOTCH_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html	Genes up-regulated by activation of Notch signaling.		28/32	Arthur Liberzon	0.271	0.586487804394038	40	16.6666666666667	11	0.1	1	0.623573196381421	-0.703516699577552	-1	0.69638194408607	38	1	11
-5	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	2.56397784509287e-08	3.20497230636609e-07	5	15.6666666666667	12	8.54659289002046e-09	4	0.724979825102502	0.798585365579773	1	22.9015712966849	4	12	31
--- a/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
-1	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	6.74284874774357e-08	1.12380812462393e-06	5	14.6666666666667	3	2.24761629976564e-08	3	0.359329542834304	-0.36180948485806	-1	5.6036400577191	3	3	38
-2	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	1.06165133122067e-07	1.32706416402584e-06	5	6	4	3.5388378959693e-08	2	2.30703143698757	2.37290331728617	1	35.6474882896303	4	12	2
-3	M5944	HALLMARK_ANGIOGENESIS	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_ANGIOGENESIS.html	Genes up-regulated during formation of blood vessels (angiogenesis).		24/36	Arthur Liberzon	0.0987135364567957	0.224348946492718	30	13.3333333333333	8	0.034050809631757	4	3.55096745337827	3.55096745337827	1	6.04317533929437	28	4	8
-4	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.035593452533085	0.104686625097309	15	8.66666666666667	9	0.0120081015122172	2	2.93272326169216	-2.93272326169216	-1	7.54140790688487	15	2	9
-5	M5953	HALLMARK_KRAS_SIGNALING_UP	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_KRAS_SIGNALING_UP.html	Genes up-regulated by KRAS activation.		155/200	Arthur Liberzon	0.00020119611514047	0.00111775619522483	10	11.3333333333333	9	6.70698699803809e-05	1	3.44648124390937	-3.40209324332434	-1	26.7406537848717	9	24	1
--- a/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	safe	gage	zscore	gsva	globaltest	ora	ssgsea	padog	plage	fry	roast
-1	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	2.69713922630138e-07	2.24761602191782e-06	5	9	3	2.24761629976564e-08	1	0.359329542834304	-0.36180948485806	-1	5.4326602414965	3	3	41	5	2	3	38	5	5	1	1	1
-2	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	1.11005416363834e-12	1.85009027273056e-11	5	7	6.5	9.25045136365749e-14	1	2.30703143698757	2.37290331728617	1	66.3484191472523	4	9	1	15	1	12	2	1	1	18	11	9
-3	M5913	HALLMARK_INTERFERON_GAMMA_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INTERFERON_GAMMA_RESPONSE.html	Genes up-regulated in response to IFNG [GeneID=3458].		181/200	Arthur Liberzon	1.18424540125318e-21	2.96061350313295e-20	5	11.75	7	9.86871167710982e-23	2	0.883356836238503	1.02316711345591	1	46.2225133313255	2	31	7	2	5	17	30	7	22	3	7	8
-4	M5890	HALLMARK_TNFA_SIGNALING_VIA_NFKB	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html	Genes regulated by NF-kB in response to TNF [GeneID=7124].		181/200	Arthur Liberzon	2.03460035619483e-07	2.03460035619483e-06	5	10.5833333333333	9	1.69550045493871e-08	2	2.41712304125514	2.41712304125514	1	36.8602254948149	5	8	3	13	3	26	19	2	2	26	10	10
-5	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.00629753896190303	0.017493163783064	10	12.25	9.5	0.000526315789473684	2	2.93272326169216	-2.93272326169216	-1	13.8032973517574	15	5	32	21	10	2	9	9	6	7	15	16
--- a/test-data/ranked-h-gene-sets-compare.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
-1	M5890	HALLMARK_TNFA_SIGNALING_VIA_NFKB	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html	Genes regulated by NF-kB in response to TNF [GeneID=7124].		181/200	Arthur Liberzon	3.69843944783645e-09	6.16406574639408e-08	5	10	3	1.23281315079864e-09	1	0.588637134386457	-0.680899745245093	-1	20.6439242624481	3	26	1
-2	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.0440661300653943	0.169485115636132	5	6.66666666666667	5	0.014909910604653	2	0.412052814817513	0.432421220391424	1	1.53878145448751	13	2	5
-3	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	4.79647602359615e-07	4.79647602359616e-06	5	12.3333333333333	5	1.59882559682303e-07	3	0.342550383131631	0.340902674013207	1	8.85877098589516	5	3	29
-4	M5903	HALLMARK_NOTCH_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html	Genes up-regulated by activation of Notch signaling.		28/32	Arthur Liberzon	0.271	0.586487804394038	40	16.6666666666667	11	0.1	1	0.623573196381421	0.703516699577552	1	0.69638194408607	38	1	11
-5	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	2.56397784509287e-08	3.20497230636609e-07	5	15.6666666666667	12	8.54659289002046e-09	4	0.724979825102502	-0.798585365579773	-1	22.9015712966849	4	12	31
--- a/test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt	Thu Feb 15 02:34:59 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-Rank	ID	GeneSet	NumGenes	Type	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
-1	hsa00290	Valine, leucine and isoleucine biosynthesis	4/4	Metabolism	5.16192312928897e-05	0.00454249235377429	5	10	3	1.72067065000137e-05	1	1.97581010580874	2.54323543396097	1	100	1	3	26
-2	hsa00030	Pentose phosphate pathway	25/30	Metabolism	0.000897710278839886	0.02633283484597	5	15.6666666666667	5	0.000299326346935758	3	0.349366309696153	-0.380844101339739	-1	11.9206774457994	3	39	5
-3	hsa00020	Citrate cycle (TCA cycle)	27/30	Metabolism	0.0136940769984121	0.150634846982533	10	28.3333333333333	8	0.00458568873048893	4	0.240049000819749	-0.223354655613846	-1	4.26220576401628	8	73	4
-4	hsa00514	Other types of O-glycan biosynthesis	20/22	Metabolism	0.0686977107861341	0.431814182084272	15	24	14	0.0234445903132394	14	0.33942901732311	-0.368372028499622	-1	2.67325954506273	14	14	44
-5	hsa00120	Primary bile acid biosynthesis	9/17	Metabolism	0.059624376308086	0.431814182084272	15	15.3333333333333	15	0.0202834278979437	12	0.734186992108813	0.887438287891993	1	5.78363255759184	12	19	15