changeset 1:73281fbdf6c1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit 225518a08941e7ef8e5c402e3696ec5fa6e592a0
author iuc
date Thu, 15 Feb 2018 02:34:59 -0500
parents a8a083193440
children ba2111ae6eb4
files egsea.R egsea.xml test-data/out_rscript.txt test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt
diffstat 5 files changed, 62 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/egsea.R	Thu Jan 25 02:23:23 2018 -0500
+++ b/egsea.R	Thu Feb 15 02:34:59 2018 -0500
@@ -50,13 +50,14 @@
     make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"),
     make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"),
     make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"),
+    make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"),
     make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"),
     make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"),
     make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"),
     make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"),
     make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"),
     make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"),
-    make_option(c("-rdata", "--rdaOpt"), type="character", help="Output RData file")
+    make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file")
     )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
@@ -188,10 +189,9 @@
     gsdb <- "none"
 }
 
-
 ## Index gene sets
 
-gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude)
+gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated)
 
 
 ## Run egsea.cnt
@@ -201,6 +201,6 @@
 
 ## Output RData file
 
-if (!is.null(args$rdata)) {
+if (!is.null(args$rdaOpt)) {
   save.image(file = "EGSEA_analysis.RData")
 }
\ No newline at end of file
--- a/egsea.xml	Thu Jan 25 02:23:23 2018 -0500
+++ b/egsea.xml	Thu Feb 15 02:34:59 2018 -0500
@@ -1,12 +1,14 @@
-<tool id="egsea" name="EGSEA" version="1.6.0.0">
+<tool id="egsea" name="EGSEA" version="1.6.0.1">
     <description> easy and efficient ensemble gene set testing</description>
     <requirements>
         <requirement type="package" version="1.6.0">bioconductor-egsea</requirement>
         <requirement type="package" version="1.4.4">r-optparse</requirement>
         <requirement type="package" version="0.2.15">r-rjson</requirement>
+        <!--statmod is required for fry-->
+        <requirement type="package" version="1.4.30">r-statmod</requirement>
     </requirements>
     <version_command><![CDATA[
-echo $(R --version | grep version | grep -v GNU)", EGSEA version" $(R --vanilla --slave -e "library(EGSEA); cat(sessionInfo()\$otherPkgs\$EGSEA\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", EGSEA version" $(R --vanilla --slave -e "library(EGSEA); cat(sessionInfo()\$otherPkgs\$EGSEA\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", statmod version" $(R --vanilla --slave -e "library(statmod); cat(sessionInfo()\$otherPkgs\$statmod\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
     ]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
 ## EGSEA requires at least 2 threads
@@ -57,6 +59,7 @@
 --base_methods $base_methods
 --msigdb $msigdb.msigdb_gsets
 --keggdb $keggdb.keggdb_gsets
+--keggupdated $keggdb.kegg_updated
 --gsdb $gsdb.gsdb_gsets
 
 --display_top $advanced.display_top
@@ -186,6 +189,7 @@
                 <option value="keggsig">Signalling pathways</option>
                 <option value="keggdis">Disease pathways</option>
             </param>
+            <param name="kegg_updated" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Download KEGG pathways?" help="Select Yes if you want to download the most recent KEGG pathways, see the Help section below. Default: No"/>
         </section>
 
         <section name="gsdb" title="GeneSetDB Gene Sets" expanded="True">
@@ -259,7 +263,7 @@
     </outputs>
 
     <tests>
-         <!-- Ensure report is output -->
+        <!-- Ensure report is output -->
         <test expect_num_outputs="1">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix" />
@@ -338,6 +342,40 @@
                 <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
             </output_collection>
         </test>
+        <!-- Ensure all gene set methods work -->
+        <test expect_num_outputs="1">
+            <param name="non_commercial_use" value="True"/>
+            <param name="format" value="matrix"/>
+            <param name="counts" value="il13.counts"/>
+            <param name="genes" value="il13.genes"/>
+            <param name="ffile" value="yes"/>
+            <param name="finfo" value="il13.group_batch"/>
+            <repeat name="rep_contrast">
+                <param name="contrast" value="IL13Ant-IL13"/>
+            </repeat>
+            <param name="base_methods" value="camera,safe,gage,zscore,gsva,globaltest,ora,ssgsea,padog,plage,fry,roast"/>
+            <output_collection name="outTables" count="1">
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt"/>
+            </output_collection>
+        </test>
+        <!-- Ensure KEGG updated works -->
+        <test expect_num_outputs="1">
+            <param name="non_commercial_use" value="True"/>
+            <param name="format" value="matrix"/>
+            <param name="counts" value="il13.counts"/>
+            <param name="genes" value="il13.genes"/>
+            <param name="ffile" value="yes"/>
+            <param name="finfo" value="il13.group_batch"/>
+            <repeat name="rep_contrast">
+                <param name="contrast" value="IL13Ant-IL13"/>
+            </repeat>
+            <param name="keggdb_gsets" value="keggmet"/>
+            <param name="kegg_updated" value="True"/>
+            <output_collection name="outTables" count="2">
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
+                <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-kegg-gene-sets-IL13Ant-IL13.txt"/>
+            </output_collection>
+        </test>
     </tests>
 
     <help><![CDATA[
@@ -528,7 +566,7 @@
 
 **KEGG Pathways**
 
-Obtained by EGSEAdata from the GAGE_ Bioconductor package using the gage function kegg.gsets(). The Pathview_ Bioconductor package is used to visualize the expression data mapped onto the KEGG pathway graphs. Pathview has a GPLv3 licence which means users are required to formally cite the original `Pathview paper`_ (not just mention it) in publications or products. GAGE/Pathview divide the KEGG pathways into 3 categories: Signaling, Metabolism and Disease, listed in this file at the `Pathview website here`_.
+Obtained by EGSEAdata from the GAGE_ Bioconductor package using the gage function kegg.gsets(). The Pathview_ Bioconductor package is used to visualize the expression data mapped onto the KEGG pathway graphs. Pathview has a GPLv3 licence which means users are required to formally cite the original `Pathview paper`_ (not just mention it) in publications or products. GAGE/Pathview divide the KEGG pathways into 3 categories: Signaling, Metabolism and Disease, listed in this file at the `Pathview website here`_. You can choose if you want to download the most recent KEGG pathways by selecting the ``Download KEGG pathways`` option in the tool form above. Note that downloading the most recent pathways may affect reproducibility as you can't choose what versions of pathways to use.
 
 **Signaling**
 
--- a/test-data/out_rscript.txt	Thu Jan 25 02:23:23 2018 -0500
+++ b/test-data/out_rscript.txt	Thu Feb 15 02:34:59 2018 -0500
@@ -50,13 +50,14 @@
     make_option(c("-base_methods", "--base_methods"), type="character", help="Gene set testing methods"),
     make_option(c("-msigdb", "--msigdb"), type="character", help="MSigDB Gene Set Collections"),
     make_option(c("-keggdb", "--keggdb"), type="character", help="KEGG Pathways"),
+    make_option(c("-keggupdated", "--keggupdated"), type="logical", help="Use updated KEGG"),
     make_option(c("-gsdb", "--gsdb"), type="character", help = "GeneSetDB Gene Sets"),
     make_option(c("-display_top", "--display_top"), type="integer", help = "Number of top Gene Sets to display"),
     make_option(c("-min_size", "--min_size"), type="integer", help = "Minimum Size of Gene Set"),
     make_option(c("-fdr_cutoff", "--fdr_cutoff"), type="double", help = "FDR cutoff"),
     make_option(c("-combine_method", "--combine_method"), type="character", help="Method to use to combine the p-values"),
     make_option(c("-sort_method", "--sort_method"), type="character", help="Method to sort the results"),
-    make_option(c("-rdata", "--rdaOpt"), type="character", help="Output RData file")
+    make_option(c("-rdaOpt", "--rdaOpt"), type="character", help="Output RData file")
     )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
@@ -188,10 +189,9 @@
     gsdb <- "none"
 }
 
-
 ## Index gene sets
 
-gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude)
+gs.annots <- buildIdx(entrezIDs=rownames(counts), species=args$species, msigdb.gsets=msigdb, gsdb.gsets=gsdb, kegg.exclude=kegg_exclude, kegg.updated=args$keggupdated)
 
 
 ## Run egsea.cnt
@@ -201,6 +201,6 @@
 
 ## Output RData file
 
-if (!is.null(args$rdata)) {
+if (!is.null(args$rdaOpt)) {
   save.image(file = "EGSEA_analysis.RData")
 }
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt	Thu Feb 15 02:34:59 2018 -0500
@@ -0,0 +1,6 @@
+Rank	ID	GeneSet	BroadUrl	Description	PubMedID	NumGenes	Contributor	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	safe	gage	zscore	gsva	globaltest	ora	ssgsea	padog	plage	fry	roast
+1	M5928	HALLMARK_MYC_TARGETS_V2	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MYC_TARGETS_V2.html	A subgroup of genes regulated by MYC - version 2 (v2).		53/58	Arthur Liberzon	2.69713922630138e-07	2.24761602191782e-06	5	9	3	2.24761629976564e-08	1	0.359329542834304	-0.36180948485806	-1	5.4326602414965	3	3	41	5	2	3	38	5	5	1	1	1
+2	M5932	HALLMARK_INFLAMMATORY_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INFLAMMATORY_RESPONSE.html	Genes defining inflammatory response.		175/200	Arthur Liberzon	1.11005416363834e-12	1.85009027273056e-11	5	7	6.5	9.25045136365749e-14	1	2.30703143698757	2.37290331728617	1	66.3484191472523	4	9	1	15	1	12	2	1	1	18	11	9
+3	M5913	HALLMARK_INTERFERON_GAMMA_RESPONSE	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_INTERFERON_GAMMA_RESPONSE.html	Genes up-regulated in response to IFNG [GeneID=3458].		181/200	Arthur Liberzon	1.18424540125318e-21	2.96061350313295e-20	5	11.75	7	9.86871167710982e-23	2	0.883356836238503	1.02316711345591	1	46.2225133313255	2	31	7	2	5	17	30	7	22	3	7	8
+4	M5890	HALLMARK_TNFA_SIGNALING_VIA_NFKB	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html	Genes regulated by NF-kB in response to TNF [GeneID=7124].		181/200	Arthur Liberzon	2.03460035619483e-07	2.03460035619483e-06	5	10.5833333333333	9	1.69550045493871e-08	2	2.41712304125514	2.41712304125514	1	36.8602254948149	5	8	3	13	3	26	19	2	2	26	10	10
+5	M5895	HALLMARK_WNT_BETA_CATENIN_SIGNALING	http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING.html	Genes up-regulated by activation of WNT signaling through accumulation of beta catenin CTNNB1 [GeneID=1499].		35/42	Arthur Liberzon	0.00629753896190303	0.017493163783064	10	12.25	9.5	0.000526315789473684	2	2.93272326169216	-2.93272326169216	-1	13.8032973517574	15	5	32	21	10	2	9	9	6	7	15	16
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ranked-kegg-gene-sets-IL13Ant-IL13.txt	Thu Feb 15 02:34:59 2018 -0500
@@ -0,0 +1,6 @@
+Rank	ID	GeneSet	NumGenes	Type	p.value	p.adj	vote.rank	avg.rank	med.rank	min.pvalue	min.rank	avg.logfc	avg.logfc.dir	direction	significance	camera	globaltest	ora
+1	hsa00290	Valine, leucine and isoleucine biosynthesis	4/4	Metabolism	5.16192312928897e-05	0.00454249235377429	5	10	3	1.72067065000137e-05	1	1.97581010580874	2.54323543396097	1	100	1	3	26
+2	hsa00030	Pentose phosphate pathway	25/30	Metabolism	0.000897710278839886	0.02633283484597	5	15.6666666666667	5	0.000299326346935758	3	0.349366309696153	-0.380844101339739	-1	11.9206774457994	3	39	5
+3	hsa00020	Citrate cycle (TCA cycle)	27/30	Metabolism	0.0136940769984121	0.150634846982533	10	28.3333333333333	8	0.00458568873048893	4	0.240049000819749	-0.223354655613846	-1	4.26220576401628	8	73	4
+4	hsa00514	Other types of O-glycan biosynthesis	20/22	Metabolism	0.0686977107861341	0.431814182084272	15	24	14	0.0234445903132394	14	0.33942901732311	-0.368372028499622	-1	2.67325954506273	14	14	44
+5	hsa00120	Primary bile acid biosynthesis	9/17	Metabolism	0.059624376308086	0.431814182084272	15	15.3333333333333	15	0.0202834278979437	12	0.734186992108813	0.887438287891993	1	5.78363255759184	12	19	15