diff egsea.xml @ 2:ba2111ae6eb4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/egsea commit ee9b940a5ffa6c52e42a89e55c0f5bdda0a386d5
author iuc
date Mon, 28 Jan 2019 18:39:06 -0500
parents 73281fbdf6c1
children 31ea4992b948
line wrap: on
line diff
--- a/egsea.xml	Thu Feb 15 02:34:59 2018 -0500
+++ b/egsea.xml	Mon Jan 28 18:39:06 2019 -0500
@@ -1,9 +1,9 @@
-<tool id="egsea" name="EGSEA" version="1.6.0.1">
+<tool id="egsea" name="EGSEA" version="1.8.0">
     <description> easy and efficient ensemble gene set testing</description>
     <requirements>
-        <requirement type="package" version="1.6.0">bioconductor-egsea</requirement>
-        <requirement type="package" version="1.4.4">r-optparse</requirement>
-        <requirement type="package" version="0.2.15">r-rjson</requirement>
+        <requirement type="package" version="1.8.0">bioconductor-egsea</requirement>
+        <requirement type="package" version="1.6.0">r-optparse</requirement>
+        <requirement type="package" version="0.2.20">r-rjson</requirement>
         <!--statmod is required for fry-->
         <requirement type="package" version="1.4.30">r-statmod</requirement>
     </requirements>
@@ -264,7 +264,7 @@
 
     <tests>
         <!-- Ensure report is output -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix" />
             <param name="counts" value="il13.counts"/>
@@ -277,7 +277,12 @@
                 <param name="contrast" value="IL13Ant-IL13"/>
             </repeat>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
             <output name="outReport">
                 <assert_contents>
@@ -286,7 +291,7 @@
             </output>
         </test>
         <!-- Ensure factors file input works and Rscript is output-->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="3">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -303,12 +308,21 @@
                 </assert_contents>
             </output>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
-            <output name="outRscript" value="out_rscript.txt"/>
+            <output name="outRscript">
+                <assert_contents>
+                    <has_text_matching expression="save.image" />
+                </assert_contents>
+            </output>
         </test>
         <!-- Ensure two contrasts works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -322,13 +336,22 @@
                 <param name="contrast" value="IL13-IL13Ant"/>
             </repeat>
             <output_collection name="outTables" count="3">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13.txt"/>
-                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-IL13-IL13Ant.txt"/>
-                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular" file="ranked-h-gene-sets-compare.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
+                <element name="ranked-h-gene-sets-IL13-IL13Ant" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5890.*HALLMARK_TNFA_SIGNALING_VIA_NFKB.*181/200.*3.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure two factors works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -339,11 +362,16 @@
                 <param name="contrast" value="IL13Ant-IL13"/>
             </repeat>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*6.7" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure all gene set methods work -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -355,11 +383,16 @@
             </repeat>
             <param name="base_methods" value="camera,safe,gage,zscore,gsva,globaltest,ora,ssgsea,padog,plage,fry,roast"/>
             <output_collection name="outTables" count="1">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch_all.txt"/>
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*2.6" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
         <!-- Ensure KEGG updated works -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="non_commercial_use" value="True"/>
             <param name="format" value="matrix"/>
             <param name="counts" value="il13.counts"/>
@@ -372,8 +405,51 @@
             <param name="keggdb_gsets" value="keggmet"/>
             <param name="kegg_updated" value="True"/>
             <output_collection name="outTables" count="2">
-                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-h-gene-sets-IL13Ant-IL13_batch.txt"/>
-                <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular" file="ranked-kegg-gene-sets-IL13Ant-IL13.txt"/>
+                <element name="ranked-kegg-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*NumGenes.*Type.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*hsa00290.*Valine, leucine and isoleucine biosynthesis.*4/4.*Metabolism.*5.1" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- Ensure individual counts files works -->
+        <test expect_num_outputs="2">
+            <param name="non_commercial_use" value="True"/>
+            <param name="format" value="files" />
+            <repeat name="rep_factor">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_group">
+                    <param name="groupName" value="IL13"/>
+                    <param name="countsFile" value="IL13-1.counts,IL13-2.counts,IL13-3.counts"/>
+                </repeat>
+                <repeat name="rep_group">
+                    <param name="groupName" value="IL13Ant"/>
+                    <param name="countsFile" value="IL13Ant-1.counts,IL13Ant-2.counts"/>
+                </repeat>
+            </repeat>
+            <repeat name="rep_factor">
+                <param name="factorName" value="Batch"/>
+                <repeat name="rep_group">
+                    <param name="groupName" value="b1"/>
+                    <param name="countsFile" value="IL13-1.counts,IL13Ant-1.counts"/>
+                </repeat>
+                <repeat name="rep_group">
+                    <param name="groupName" value="b2"/>
+                    <param name="countsFile" value="IL13-2.counts,IL13-3.counts,IL13Ant-2.counts"/>
+                </repeat>
+            </repeat>
+            <param name="genes" value="il13.genes"/>
+            <repeat name="rep_contrast">
+                <param name="contrast" value="IL13Ant-IL13"/>
+            </repeat>
+            <output_collection name="outTables" count="1">
+                <element name="ranked-h-gene-sets-IL13Ant-IL13" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Rank.*ID.*GeneSet.*BroadUrl.*Description.*PubMedID.*NumGenes.*Contributor.*p.value.*p.adj.*vote.rank.*avg.rank.*med.rank.*min.pvalue.*min.rank.*avg.logfc.*avg.logfc.dir.*direction.*significance.*camera.*globaltest.*ora" />
+                        <has_text_matching expression="1.*M5928.*HALLMARK_MYC_TARGETS_V2.*53/58.*6.7" />
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
     </tests>
@@ -409,11 +485,23 @@
 
 **Inputs**
 
-**Counts Matrix**
+**Counts Data**
+
+This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The counts data can either be input as separate counts files (one sample per file) or a single count matrix (one sample per column). The rows correspond to genes, and columns correspond to the counts for the samples. Values must be tab separated, with the first row containing the sample/column labels. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool. Genes with low counts should be removed, such as in the filtered counts matrix that can be output from the **limma** tool.
+
+Example - **Separate Count Files**:
 
-This tool requires a counts matrix (counts table) containing the raw RNA-seq read counts. The first column must contain Entrez Gene IDs that are unique (not repeated) within the counts file. Entrez IDs can be obtained from the **annotateMyIDs** Galaxy tool.
+    =============== ==========
+    EntrezID        **WT1**
+    =============== ==========
+    1               71
+    1000            3
+    10000           2310
+    100009605       3
+    100009613       9
+    =============== ==========
 
-Example:
+Example - **Single Count Matrix**:
 
     =============== ========== ========== ========== ========= ========= =========
     EntrezID        **WT1**    **WT2**    **WT3**    **Mut1**  **Mut2**  **Mut3**
@@ -427,7 +515,7 @@
 
 **Factor Information**
 
-Enter factor names and groups in the tool form, or provide a tab-separated file that has the samples in the same order as listed in the columns of the counts matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch.
+Enter factor names and groups in the tool form, or provide a tab-separated file that has the names of the samples in the first column and one header row. The sample names must be the same as the names in the columns of the count matrix. The second column should contain the primary factor levels (e.g. WT, Mut) with optional additional columns for any secondary factors e.g Batch.
 
 Example:
 
@@ -444,7 +532,7 @@
 
 *Factor Name:* The name of the experimental factor being investigated e.g. Genotype, Treatment. One factor must be entered and spaces must not be used. Optionally, additional factors can be included, these are variables that might influence your experiment e.g. Batch, Gender, Subject. If additional factors are entered, edgeR will fit an additive linear model.
 
-*Groups:* The names of the groups for the factor. These must be entered in the same order as the samples (to which the groups correspond) are listed in the columns of the counts matrix. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
+*Groups:* The names of the groups for the factor. Spaces must not be used and if entered into the tool form above, the values should be separated by commas.
 
 **Symbols Mapping file**