# HG changeset patch # User iuc # Date 1548718746 18000 # Node ID ba2111ae6eb40bf49a98a45ab5219497ee8582bf # Parent 73281fbdf6c1266d750e6f24c50ae9da75351706 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit ee9b940a5ffa6c52e42a89e55c0f5bdda0a386d5 diff -r 73281fbdf6c1 -r ba2111ae6eb4 egsea.R --- a/egsea.R Thu Feb 15 02:34:59 2018 -0500 +++ b/egsea.R Mon Jan 28 18:39:06 2019 -0500 @@ -98,14 +98,19 @@ } else { # Process the single count matrix - counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) + counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names=FALSE) row.names(counts) <- counts[, 1] counts <- counts[ , -1] countsRows <- nrow(counts) # Process factors if (is.null(args$factInput)) { - factorData <- read.table(args$factFile, header=TRUE, sep="\t") + factorData <- read.table(args$factFile, header=TRUE, sep="\t", strip.white=TRUE) + # check samples names match + if(!any(factorData[, 1] %in% colnames(counts))) + stop("Sample IDs in factors file and count matrix don't match") + # order samples as in counts matrix + factorData <- factorData[match(colnames(counts), factorData[, 1]), ] factors <- factorData[, -1, drop=FALSE] } else { factors <- unlist(strsplit(args$factInput, "|", fixed=TRUE)) diff -r 73281fbdf6c1 -r ba2111ae6eb4 egsea.xml --- a/egsea.xml Thu Feb 15 02:34:59 2018 -0500 +++ b/egsea.xml Mon Jan 28 18:39:06 2019 -0500 @@ -1,9 +1,9 @@ - + easy and efficient ensemble gene set testing - bioconductor-egsea - r-optparse - r-rjson + bioconductor-egsea + r-optparse + r-rjson r-statmod @@ -264,7 +264,7 @@ - + @@ -277,7 +277,12 @@ - + + + + + + @@ -286,7 +291,7 @@ - + @@ -303,12 +308,21 @@ - + + + + + + - + + + + + - + @@ -322,13 +336,22 @@ - - - + + + + + + + + + + + + - + @@ -339,11 +362,16 @@ - + + + + + + - + @@ -355,11 +383,16 @@ - + + + + + + - + @@ -372,8 +405,51 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -409,11 +485,23 @@ **Inputs** -**Counts Matrix** +**Counts Data** + +This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The counts data can either be input as separate counts files (one sample per file) or a single count matrix (one sample per column). The rows correspond to genes, and columns correspond to the counts for the samples. Values must be tab separated, with the first row containing the sample/column labels. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool. Genes with low counts should be removed, such as in the filtered counts matrix that can be output from the **limma** tool. + +Example - **Separate Count Files**: -This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool. + =============== ========== + EntrezID **WT1** + =============== ========== + 1 71 + 1000 3 + 10000 2310 + 100009605 3 + 100009613 9 + =============== ========== -Example: +Example - **Single Count Matrix**: =============== ========== ========== ========== ========= ========= ========= EntrezID **WT1** **WT2** **WT3** **Mut1** **Mut2** **Mut3** @@ -427,7 +515,7 @@ **Factor Information** -Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch. +Enter factor names and groups in the tool form, or provide a tab-separated file that has the names of the samples in the first column and one header row. The sample names must be the same as the names in the columns of the count matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch. Example: @@ -444,7 +532,7 @@ *Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model. -*Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas. +*Groups:* The names of the groups for the factor. Spaces must not be used and if entered into the tool form above, the values should be separated by commas. **Symbols Mapping file** diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/il13.group --- a/test-data/il13.group Thu Feb 15 02:34:59 2018 -0500 +++ b/test-data/il13.group Mon Jan 28 18:39:06 2019 -0500 @@ -1,6 +1,6 @@ SampleID Treatment -IL13-1 IL13 IL13Ant-1 IL13Ant IL13-2 IL13 IL13-3 IL13 IL13Ant-2 IL13Ant +IL13-1 IL13 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/out_rscript.txt --- a/test-data/out_rscript.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,206 +0,0 @@ -# Code based on (and inspired by) the Galaxy limma-voom/edgeR/DESeq2 wrappers - -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -suppressPackageStartupMessages({ - library(EGSEA) - library(limma) - library(edgeR) - library(optparse) -}) - - -## Function Declaration - -sanitiseEquation <- function(equation) { - equation <- gsub(" *[+] *", "+", equation) - equation <- gsub(" *[-] *", "-", equation) - equation <- gsub(" *[/] *", "/", equation) - equation <- gsub(" *[*] *", "*", equation) - equation <- gsub("^\\s+|\\s+$", "", equation) - return(equation) -} - -# Function to sanitise group information -sanitiseGroups <- function(string) { - string <- gsub(" *[,] *", ",", string) - string <- gsub("^\\s+|\\s+$", "", string) - return(string) -} - -# Generating design information -pasteListName <- function(string) { - return(paste0("factors$", string)) -} - -## Input Processing - -option_list <- list( - make_option(c("-threads", "--threads"), default=2, type="integer", help="Number of threads for egsea"), - make_option(c("-filesPath", "--filesPath"), type="character", help="JSON list object if multiple files input"), - make_option(c("-matrixPath", "--matrixPath"), type="character", help="Path to count matrix"), - make_option(c("-factFile", "--factFile"), type="character", help="Path to factor information file"), - make_option(c("-factInput", "--factInput"), type="character", help="String containing factors if manually input"), - make_option(c("-contrastData", "--contrastData"), type="character", help="Contrasts of Interest (Groups to compare)"), - make_option(c("-genes", "--genes"), type="character", help="Path to genes file"), - make_option(c("-species", "--species"), type="character"), - make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"), - make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"), - make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"), - make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"), - make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"), - make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"), - make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"), - make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"), - make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"), - make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"), - make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file") - ) - -parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) -args = parse_args(parser) - - -## Read in Files - -if (!is.null(args$filesPath)) { - # Process the separate count files (adapted from DESeq2 wrapper) - library("rjson") - parser <- newJSONParser() - parser$addData(args$filesPath) - factorList <- parser$getObject() - factors <- sapply(factorList, function(x) x[[1]]) - filenamesIn <- unname(unlist(factorList[[1]][[2]])) - sampleTable <- data.frame(sample=basename(filenamesIn), - filename=filenamesIn, - row.names=filenamesIn, - stringsAsFactors=FALSE) - for (factor in factorList) { - factorName <- factor[[1]] - sampleTable[[factorName]] <- character(nrow(sampleTable)) - lvls <- sapply(factor[[2]], function(x) names(x)) - for (i in seq_along(factor[[2]])) { - files <- factor[[2]][[i]][[1]] - sampleTable[files,factorName] <- lvls[i] - } - sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) - } - rownames(sampleTable) <- sampleTable$sample - rem <- c("sample","filename") - factors <- sampleTable[, !(names(sampleTable) %in% rem), drop=FALSE] - - #read in count files and create single table - countfiles <- lapply(sampleTable$filename, function(x){read.delim(x, row.names=1)}) - counts <- do.call("cbind", countfiles) - -} else { - # Process the single count matrix - counts <- read.table(args$matrixPath, header=TRUE, sep="\t", stringsAsFactors=FALSE) - row.names(counts) <- counts[, 1] - counts <- counts[ , -1] - countsRows <- nrow(counts) - - # Process factors - if (is.null(args$factInput)) { - factorData <- read.table(args$factFile, header=TRUE, sep="\t") - factors <- factorData[, -1, drop=FALSE] - } else { - factors <- unlist(strsplit(args$factInput, "|", fixed=TRUE)) - factorData <- list() - for (fact in factors) { - newFact <- unlist(strsplit(fact, split="::")) - factorData <- rbind(factorData, newFact) - } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. - - # Set the row names to be the name of the factor and delete first row - row.names(factorData) <- factorData[, 1] - factorData <- factorData[, -1] - factorData <- sapply(factorData, sanitiseGroups) - factorData <- sapply(factorData, strsplit, split=",") - factorData <- sapply(factorData, make.names) - # Transform factor data into data frame of R factor objects - factors <- data.frame(factorData) - } -} - -# Create a DGEList object -counts <- DGEList(counts) - -# Set group to be the Primary Factor input -group <- factors[, 1, drop=FALSE] - -# Split up contrasts separated by comma into a vector then sanitise -contrastData <- unlist(strsplit(args$contrastData, split=",")) -contrastData <- sanitiseEquation(contrastData) -contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) - -# Creating design -row.names(factors) <- colnames(counts) -factorList <- sapply(names(factors), pasteListName) - -formula <- "~0" -for (i in 1:length(factorList)) { - formula <- paste(formula, factorList[i], sep="+") -} -formula <- formula(formula) - -design <- model.matrix(formula) - -for (i in 1:length(factorList)) { - colnames(design) <- gsub(factorList[i], "", colnames(design), fixed=TRUE) -} - -## Generate Contrasts information -contrasts <- makeContrasts(contrasts=contrastData, levels=design) - - -## Add Gene Symbol information - -genes <- read.table(args$genes, sep='\t', header=TRUE) - - -## Set Gene Set Testing Methods - -base_methods <- unlist(strsplit(args$base_methods, ",")) - - -## Set Gene Sets - -if (args$msigdb != "None") { - msigdb <- unlist(strsplit(args$msigdb, ",")) -} else { - msigdb <- "none" -} - -if (args$keggdb != "None") { - keggdb <- unlist(strsplit(args$keggdb, ",")) - kegg_all <- c("Metabolism"="keggmet", "Signaling"="keggsig", "Disease"="keggdis") - kegg_exclude <- names(kegg_all[!(kegg_all %in% keggdb)]) -} else { - kegg_exclude <- "all" -} - -if (args$gsdb != "None") { - gsdb <- unlist(strsplit(args$gsdb, ",")) -} else { - gsdb <- "none" -} - -## Index gene sets - -gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated) - - -## Run egsea.cnt - -gsa <- egsea.cnt(counts=counts, group=group, design=design, contrasts=contrasts, gs.annots=gs.annots, symbolsMap=genes, baseGSEAs=base_methods, minSize=args$min_size, display.top=args$display_top, combineMethod=args$combine_method, sort.by=args$sort_method, report.dir='./report_dir', fdr.cutoff=args$fdr_cutoff, num.threads=args$threads, report=TRUE) - - -## Output RData file - -if (!is.null(args$rdaOpt)) { - save.image(file = "EGSEA_analysis.RData") -} \ No newline at end of file diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-h-gene-sets-IL13-IL13Ant.txt --- a/test-data/ranked-h-gene-sets-IL13-IL13Ant.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora -1 M5890 HALLMARK_TNFA_SIGNALING_VIA_NFKB http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html Genes regulated by NF-kB in response to TNF [GeneID=7124]. 181/200 Arthur Liberzon 3.69843944783645e-09 6.16406574639408e-08 5 10 3 1.23281315079864e-09 1 0.588637134386457 -0.680899745245093 -1 20.6439242624481 3 26 1 -2 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.0440661300653943 0.169485115636132 5 6.66666666666667 5 0.014909910604653 2 0.412052814817513 0.432421220391424 1 1.53878145448751 13 2 5 -3 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 4.79647602359615e-07 4.79647602359616e-06 5 12.3333333333333 5 1.59882559682303e-07 3 0.342550383131631 0.340902674013207 1 8.85877098589516 5 3 29 -4 M5903 HALLMARK_NOTCH_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html Genes up-regulated by activation of Notch signaling. 28/32 Arthur Liberzon 0.271 0.586487804394038 40 16.6666666666667 11 0.1 1 0.623573196381421 0.703516699577552 1 0.69638194408607 38 1 11 -5 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 2.56397784509287e-08 3.20497230636609e-07 5 15.6666666666667 12 8.54659289002046e-09 4 0.724979825102502 -0.798585365579773 -1 22.9015712966849 4 12 31 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-h-gene-sets-IL13Ant-IL13.txt --- a/test-data/ranked-h-gene-sets-IL13Ant-IL13.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora -1 M5890 HALLMARK_TNFA_SIGNALING_VIA_NFKB http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html Genes regulated by NF-kB in response to TNF [GeneID=7124]. 181/200 Arthur Liberzon 3.69843944783645e-09 6.16406574639408e-08 5 10 3 1.23281315079864e-09 1 0.588637134386457 0.680899745245093 1 20.6439242624481 3 26 1 -2 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.0440661300653943 0.169485115636132 5 6.66666666666667 5 0.014909910604653 2 0.412052814817513 -0.432421220391424 -1 1.53878145448751 13 2 5 -3 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 4.79647602359615e-07 4.79647602359616e-06 5 12.3333333333333 5 1.59882559682303e-07 3 0.342550383131631 -0.340902674013207 -1 8.85877098589516 5 3 29 -4 M5903 HALLMARK_NOTCH_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html Genes up-regulated by activation of Notch signaling. 28/32 Arthur Liberzon 0.271 0.586487804394038 40 16.6666666666667 11 0.1 1 0.623573196381421 -0.703516699577552 -1 0.69638194408607 38 1 11 -5 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 2.56397784509287e-08 3.20497230636609e-07 5 15.6666666666667 12 8.54659289002046e-09 4 0.724979825102502 0.798585365579773 1 22.9015712966849 4 12 31 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-h-gene-sets-IL13Ant-IL13_batch.txt --- a/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora -1 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 6.74284874774357e-08 1.12380812462393e-06 5 14.6666666666667 3 2.24761629976564e-08 3 0.359329542834304 -0.36180948485806 -1 5.6036400577191 3 3 38 -2 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 1.06165133122067e-07 1.32706416402584e-06 5 6 4 3.5388378959693e-08 2 2.30703143698757 2.37290331728617 1 35.6474882896303 4 12 2 -3 M5944 HALLMARK_ANGIOGENESIS http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_ANGIOGENESIS.html Genes up-regulated during formation of blood vessels (angiogenesis). 24/36 Arthur Liberzon 0.0987135364567957 0.224348946492718 30 13.3333333333333 8 0.034050809631757 4 3.55096745337827 3.55096745337827 1 6.04317533929437 28 4 8 -4 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.035593452533085 0.104686625097309 15 8.66666666666667 9 0.0120081015122172 2 2.93272326169216 -2.93272326169216 -1 7.54140790688487 15 2 9 -5 M5953 HALLMARK_KRAS_SIGNALING_UP http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_KRAS_SIGNALING_UP.html Genes up-regulated by KRAS activation. 155/200 Arthur Liberzon 0.00020119611514047 0.00111775619522483 10 11.3333333333333 9 6.70698699803809e-05 1 3.44648124390937 -3.40209324332434 -1 26.7406537848717 9 24 1 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt --- a/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera safe gage zscore gsva globaltest ora ssgsea padog plage fry roast -1 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 2.69713922630138e-07 2.24761602191782e-06 5 9 3 2.24761629976564e-08 1 0.359329542834304 -0.36180948485806 -1 5.4326602414965 3 3 41 5 2 3 38 5 5 1 1 1 -2 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 1.11005416363834e-12 1.85009027273056e-11 5 7 6.5 9.25045136365749e-14 1 2.30703143698757 2.37290331728617 1 66.3484191472523 4 9 1 15 1 12 2 1 1 18 11 9 -3 M5913 HALLMARK_INTERFERON_GAMMA_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INTERFERON_GAMMA_RESPONSE.html Genes up-regulated in response to IFNG [GeneID=3458]. 181/200 Arthur Liberzon 1.18424540125318e-21 2.96061350313295e-20 5 11.75 7 9.86871167710982e-23 2 0.883356836238503 1.02316711345591 1 46.2225133313255 2 31 7 2 5 17 30 7 22 3 7 8 -4 M5890 HALLMARK_TNFA_SIGNALING_VIA_NFKB http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html Genes regulated by NF-kB in response to TNF [GeneID=7124]. 181/200 Arthur Liberzon 2.03460035619483e-07 2.03460035619483e-06 5 10.5833333333333 9 1.69550045493871e-08 2 2.41712304125514 2.41712304125514 1 36.8602254948149 5 8 3 13 3 26 19 2 2 26 10 10 -5 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.00629753896190303 0.017493163783064 10 12.25 9.5 0.000526315789473684 2 2.93272326169216 -2.93272326169216 -1 13.8032973517574 15 5 32 21 10 2 9 9 6 7 15 16 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-h-gene-sets-compare.txt --- a/test-data/ranked-h-gene-sets-compare.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora -1 M5890 HALLMARK_TNFA_SIGNALING_VIA_NFKB http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html Genes regulated by NF-kB in response to TNF [GeneID=7124]. 181/200 Arthur Liberzon 3.69843944783645e-09 6.16406574639408e-08 5 10 3 1.23281315079864e-09 1 0.588637134386457 -0.680899745245093 -1 20.6439242624481 3 26 1 -2 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.0440661300653943 0.169485115636132 5 6.66666666666667 5 0.014909910604653 2 0.412052814817513 0.432421220391424 1 1.53878145448751 13 2 5 -3 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 4.79647602359615e-07 4.79647602359616e-06 5 12.3333333333333 5 1.59882559682303e-07 3 0.342550383131631 0.340902674013207 1 8.85877098589516 5 3 29 -4 M5903 HALLMARK_NOTCH_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_NOTCH_SIGNALING.html Genes up-regulated by activation of Notch signaling. 28/32 Arthur Liberzon 0.271 0.586487804394038 40 16.6666666666667 11 0.1 1 0.623573196381421 0.703516699577552 1 0.69638194408607 38 1 11 -5 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 2.56397784509287e-08 3.20497230636609e-07 5 15.6666666666667 12 8.54659289002046e-09 4 0.724979825102502 -0.798585365579773 -1 22.9015712966849 4 12 31 diff -r 73281fbdf6c1 -r ba2111ae6eb4 test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt --- a/test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt Thu Feb 15 02:34:59 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -Rank ID GeneSet NumGenes Type p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora -1 hsa00290 Valine, leucine and isoleucine biosynthesis 4/4 Metabolism 5.16192312928897e-05 0.00454249235377429 5 10 3 1.72067065000137e-05 1 1.97581010580874 2.54323543396097 1 100 1 3 26 -2 hsa00030 Pentose phosphate pathway 25/30 Metabolism 0.000897710278839886 0.02633283484597 5 15.6666666666667 5 0.000299326346935758 3 0.349366309696153 -0.380844101339739 -1 11.9206774457994 3 39 5 -3 hsa00020 Citrate cycle (TCA cycle) 27/30 Metabolism 0.0136940769984121 0.150634846982533 10 28.3333333333333 8 0.00458568873048893 4 0.240049000819749 -0.223354655613846 -1 4.26220576401628 8 73 4 -4 hsa00514 Other types of O-glycan biosynthesis 20/22 Metabolism 0.0686977107861341 0.431814182084272 15 24 14 0.0234445903132394 14 0.33942901732311 -0.368372028499622 -1 2.67325954506273 14 14 44 -5 hsa00120 Primary bile acid biosynthesis 9/17 Metabolism 0.059624376308086 0.431814182084272 15 15.3333333333333 15 0.0202834278979437 12 0.734186992108813 0.887438287891993 1 5.78363255759184 12 19 15