Mercurial > repos > iuc > egsea
changeset 1:73281fbdf6c1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit 225518a08941e7ef8e5c402e3696ec5fa6e592a0
author | iuc |
---|---|
date | Thu, 15 Feb 2018 02:34:59 -0500 |
parents | a8a083193440 |
children | ba2111ae6eb4 |
files | egsea.R egsea.xml test-data/out_rscript.txt test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt |
diffstat | 5 files changed, 62 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/egsea.R Thu Jan 25 02:23:23 2018 -0500 +++ b/egsea.R Thu Feb 15 02:34:59 2018 -0500 @@ -50,13 +50,14 @@ make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"), make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"), make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"), + make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"), make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"), make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"), make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"), make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"), make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"), make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"), - make_option(c("-rdata", "--rdaOpt"), type="character", help="Output RData file") + make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file") ) parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) @@ -188,10 +189,9 @@ gsdb <- "none" } - ## Index gene sets -gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude) +gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated) ## Run egsea.cnt @@ -201,6 +201,6 @@ ## Output RData file -if (!is.null(args$rdata)) { +if (!is.null(args$rdaOpt)) { save.image(file = "EGSEA_analysis.RData") } \ No newline at end of file
--- a/egsea.xml Thu Jan 25 02:23:23 2018 -0500 +++ b/egsea.xml Thu Feb 15 02:34:59 2018 -0500 @@ -1,12 +1,14 @@ -<tool id="egsea" name="EGSEA" version="1.6.0.0"> +<tool id="egsea" name="EGSEA" version="1.6.0.1"> <description> easy and efficient ensemble gene set testing</description> <requirements> <requirement type="package" version="1.6.0">bioconductor-egsea</requirement> <requirement type="package" version="1.4.4">r-optparse</requirement> <requirement type="package" version="0.2.15">r-rjson</requirement> + <!--statmod is required for fry--> + <requirement type="package" version="1.4.30">r-statmod</requirement> </requirements> <version_command><![CDATA[ -echo $(R --version | grep version | grep -v GNU)", EGSEA version" $(R --vanilla --slave -e "library(EGSEA); cat(sessionInfo()\$otherPkgs\$EGSEA\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ") +echo $(R --version | grep version | grep -v GNU)", EGSEA version" $(R --vanilla --slave -e "library(EGSEA); cat(sessionInfo()\$otherPkgs\$EGSEA\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", statmod version" $(R --vanilla --slave -e "library(statmod); cat(sessionInfo()\$otherPkgs\$statmod\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> <command detect_errors="exit_code"><![CDATA[ ## EGSEA requires at least 2 threads @@ -57,6 +59,7 @@ --base_methods $base_methods --msigdb $msigdb.msigdb_gsets --keggdb $keggdb.keggdb_gsets +--keggupdated $keggdb.kegg_updated --gsdb $gsdb.gsdb_gsets --display_top $advanced.display_top @@ -186,6 +189,7 @@ <option value="keggsig">Signalling pathways</option> <option value="keggdis">Disease pathways</option> </param> + <param name="kegg_updated" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Download KEGG pathways?" help="Select Yes if you want to download the most recent KEGG pathways, see the Help section below. Default: No"/> </section> <section name="gsdb" title="GeneSetDB Gene Sets" expanded="True"> @@ -259,7 +263,7 @@ </outputs> <tests> - <!-- Ensure report is output --> + <!-- Ensure report is output --> <test expect_num_outputs="1"> <param name="non_commercial_use" value="True"/> <param name="format" value="matrix" /> @@ -338,6 +342,40 @@ <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/> </output_collection> </test> + <!-- Ensure all gene set methods work --> + <test expect_num_outputs="1"> + <param name="non_commercial_use" value="True"/> + <param name="format" value="matrix"/> + <param name="counts" value="il13.counts"/> + <param name="genes" value="il13.genes"/> + <param name="ffile" value="yes"/> + <param name="finfo" value="il13.group_batch"/> + <repeat name="rep_contrast"> + <param name="contrast" value="IL13Ant-IL13"/> + </repeat> + <param name="base_methods" value="camera,safe,gage,zscore,gsva,globaltest,ora,ssgsea,padog,plage,fry,roast"/> + <output_collection name="outTables" count="1"> + <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt"/> + </output_collection> + </test> + <!-- Ensure KEGG updated works --> + <test expect_num_outputs="1"> + <param name="non_commercial_use" value="True"/> + <param name="format" value="matrix"/> + <param name="counts" value="il13.counts"/> + <param name="genes" value="il13.genes"/> + <param name="ffile" value="yes"/> + <param name="finfo" value="il13.group_batch"/> + <repeat name="rep_contrast"> + <param name="contrast" value="IL13Ant-IL13"/> + </repeat> + <param name="keggdb_gsets" value="keggmet"/> + <param name="kegg_updated" value="True"/> + <output_collection name="outTables" count="2"> + <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/> + <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-kegg-gene-sets-IL13Ant-IL13.txt"/> + </output_collection> + </test> </tests> <help><![CDATA[ @@ -528,7 +566,7 @@ **KEGG Pathways** -Obtained by EGSEAdata from the GAGE_ Bioconductor package using the gage function kegg.gsets(). The Pathview_ Bioconductor package is used to visualize the expression data mapped onto the KEGG pathway graphs. Pathview has a GPLv3 licence which means users are required to formally cite the original `Pathview paper`_ (not just mention it) in publications or products. GAGE/Pathview divide the KEGG pathways into 3 categories: Signaling, Metabolism and Disease, listed in this file at the `Pathview website here`_. +Obtained by EGSEAdata from the GAGE_ Bioconductor package using the gage function kegg.gsets(). The Pathview_ Bioconductor package is used to visualize the expression data mapped onto the KEGG pathway graphs. Pathview has a GPLv3 licence which means users are required to formally cite the original `Pathview paper`_ (not just mention it) in publications or products. GAGE/Pathview divide the KEGG pathways into 3 categories: Signaling, Metabolism and Disease, listed in this file at the `Pathview website here`_. You can choose if you want to download the most recent KEGG pathways by selecting the ``Download KEGG pathways`` option in the tool form above. Note that downloading the most recent pathways may affect reproducibility as you can't choose what versions of pathways to use. **Signaling**
--- a/test-data/out_rscript.txt Thu Jan 25 02:23:23 2018 -0500 +++ b/test-data/out_rscript.txt Thu Feb 15 02:34:59 2018 -0500 @@ -50,13 +50,14 @@ make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"), make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"), make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"), + make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"), make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"), make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"), make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"), make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"), make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"), make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"), - make_option(c("-rdata", "--rdaOpt"), type="character", help="Output RData file") + make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file") ) parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) @@ -188,10 +189,9 @@ gsdb <- "none" } - ## Index gene sets -gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude) +gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated) ## Run egsea.cnt @@ -201,6 +201,6 @@ ## Output RData file -if (!is.null(args$rdata)) { +if (!is.null(args$rdaOpt)) { save.image(file = "EGSEA_analysis.RData") } \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt Thu Feb 15 02:34:59 2018 -0500 @@ -0,0 +1,6 @@ +Rank ID GeneSet BroadUrl Description PubMedID NumGenes Contributor p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera safe gage zscore gsva globaltest ora ssgsea padog plage fry roast +1 M5928 HALLMARK_MYC_TARGETS_V2 http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html A subgroup of genes regulated by MYC - version 2 (v2). 53/58 Arthur Liberzon 2.69713922630138e-07 2.24761602191782e-06 5 9 3 2.24761629976564e-08 1 0.359329542834304 -0.36180948485806 -1 5.4326602414965 3 3 41 5 2 3 38 5 5 1 1 1 +2 M5932 HALLMARK_INFLAMMATORY_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html Genes defining inflammatory response. 175/200 Arthur Liberzon 1.11005416363834e-12 1.85009027273056e-11 5 7 6.5 9.25045136365749e-14 1 2.30703143698757 2.37290331728617 1 66.3484191472523 4 9 1 15 1 12 2 1 1 18 11 9 +3 M5913 HALLMARK_INTERFERON_GAMMA_RESPONSE http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INTERFERON_GAMMA_RESPONSE.html Genes up-regulated in response to IFNG [GeneID=3458]. 181/200 Arthur Liberzon 1.18424540125318e-21 2.96061350313295e-20 5 11.75 7 9.86871167710982e-23 2 0.883356836238503 1.02316711345591 1 46.2225133313255 2 31 7 2 5 17 30 7 22 3 7 8 +4 M5890 HALLMARK_TNFA_SIGNALING_VIA_NFKB http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html Genes regulated by NF-kB in response to TNF [GeneID=7124]. 181/200 Arthur Liberzon 2.03460035619483e-07 2.03460035619483e-06 5 10.5833333333333 9 1.69550045493871e-08 2 2.41712304125514 2.41712304125514 1 36.8602254948149 5 8 3 13 3 26 19 2 2 26 10 10 +5 M5895 HALLMARK_WNT_BETA_CATENIN_SIGNALING http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499]. 35/42 Arthur Liberzon 0.00629753896190303 0.017493163783064 10 12.25 9.5 0.000526315789473684 2 2.93272326169216 -2.93272326169216 -1 13.8032973517574 15 5 32 21 10 2 9 9 6 7 15 16
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt Thu Feb 15 02:34:59 2018 -0500 @@ -0,0 +1,6 @@ +Rank ID GeneSet NumGenes Type p.value p.adj vote.rank avg.rank med.rank min.pvalue min.rank avg.logfc avg.logfc.dir direction significance camera globaltest ora +1 hsa00290 Valine, leucine and isoleucine biosynthesis 4/4 Metabolism 5.16192312928897e-05 0.00454249235377429 5 10 3 1.72067065000137e-05 1 1.97581010580874 2.54323543396097 1 100 1 3 26 +2 hsa00030 Pentose phosphate pathway 25/30 Metabolism 0.000897710278839886 0.02633283484597 5 15.6666666666667 5 0.000299326346935758 3 0.349366309696153 -0.380844101339739 -1 11.9206774457994 3 39 5 +3 hsa00020 Citrate cycle (TCA cycle) 27/30 Metabolism 0.0136940769984121 0.150634846982533 10 28.3333333333333 8 0.00458568873048893 4 0.240049000819749 -0.223354655613846 -1 4.26220576401628 8 73 4 +4 hsa00514 Other types of O-glycan biosynthesis 20/22 Metabolism 0.0686977107861341 0.431814182084272 15 24 14 0.0234445903132394 14 0.33942901732311 -0.368372028499622 -1 2.67325954506273 14 14 44 +5 hsa00120 Primary bile acid biosynthesis 9/17 Metabolism 0.059624376308086 0.431814182084272 15 15.3333333333333 15 0.0202834278979437 12 0.734186992108813 0.887438287891993 1 5.78363255759184 12 19 15