diff orthofinder_only_groups.xml @ 2:649b98adce77 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/orthofinder commit 297228981612581ddd4588e042141a5b12fc7840
author iuc
date Tue, 06 Mar 2018 11:10:03 -0500
parents 918d141a166b
children 1aed170afb2b
line wrap: on
line diff
--- a/orthofinder_only_groups.xml	Sun Dec 03 04:30:59 2017 -0500
+++ b/orthofinder_only_groups.xml	Tue Mar 06 11:10:03 2018 -0500
@@ -1,26 +1,26 @@
-<tool name="OrthoFinder OnlyGroups" id="orthofinder_onlygroups" version="1.1.4">
+<tool name="OrthoFinder OnlyGroups" id="orthofinder_onlygroups" version="2.1.2">
     <description>finds orthogroups in a set of proteomes</description>
     <requirements>
-        <requirement type="package" version="1.1.4">orthofinder</requirement>
+        <requirement type="package" version="2.1.2">orthofinder</requirement>
     </requirements>
     <command>
     <![CDATA[
         ## prepare inputs
-        #if $init.start=="fasta":
+        #if $init.start == "fasta":
             #set $infiles = ""
             #for $input in $init.input_fasta
                 ln -s '$input' '${input.element_identifier}.fasta' &&
                 #set $infiles = $infiles + str($input.element_identifier) + ","
             #end for
             #set $infiles = $infiles[:-1]
-        #elif $init.start=="blast":
+        #elif $init.start == "blast":
             #set $infilesbl = ""
             #for $input in $init.input_blast_out
                 ln -s '$input' '$input.element_identifier' &&
                 #set $infilesbl = $infilesbl + str($input.element_identifier) + ","
             #end for
             #set $infilesbl = $infilesbl[:-1]
-            
+
             #set $infilesfa = ""
             #for $input in $init.input_blast_fa
                 ln -s '$input' '$input.element_identifier' &&
@@ -34,28 +34,30 @@
 
         ## start Orthofinder
         orthofinder
-        #if $init.start=="fasta":
+        #if $init.start == "fasta":
             -f .
-        #elif $init.start=="blast":
+            -S $init.search.search_program
+        #elif $init.start == "blast":
             -b .
         #end if
 
         -I $I -og -t \${GALAXY_SLOTS:-1} -a \${GALAXY_SLOTS:-1} &&
 
-        #if $init.start=="fasta":
+        #if $init.start == "fasta":
             mv Results_* results
-            #if $init.keepblastout=="yes":
-                && mkdir -p results/WorkingDirectory/blast results/WorkingDirectory/fa &&
-                mv results/WorkingDirectory/Blast* results/WorkingDirectory/blast/ &&
-                mv results/WorkingDirectory/*.fa results/WorkingDirectory/fa/
+            #if $init.search.search_program == "blast":
+                #if $init.search.keepblastout:
+                    && mkdir -p results/WorkingDirectory/blast results/WorkingDirectory/fa &&
+                    mv results/WorkingDirectory/Blast* results/WorkingDirectory/blast/ &&
+                    mv results/WorkingDirectory/*.fa results/WorkingDirectory/fa/
+                #end if
             #end if
-        #elif $init.start=="blast":
+        #elif $init.start == "blast":
             mkdir results  &&
             mv *.csv results/ &&
             mv Orthogroups.txt results/
         #end if
-       
-    ]]> 
+    ]]>
     </command>
     <inputs>
         <!-- Control where Orthofinder starts -->
@@ -67,7 +69,18 @@
 
             <when value="fasta">
                 <param name="input_fasta" type="data" format="fasta" multiple="true" label="Select input fasta proteomes" help="One fasta file per species; species and sequences names in the results will remain the same than in the input files."/>
-                <param name="keepblastout" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Do you want to get the blast results ?" help="Used to re-run OrthoFinder_OnlyGroups from pre-computed blast results"/>   
+                <conditional name="search">
+                    <param name="search_program" type="select" label="Sequence search program" help="Choose between blast, blast_gz, diamond">
+                        <option value="blast" selected="true">blast</option>
+                        <option value="blast_gz">blast_gz</option>
+                        <option value="diamond">diamond</option>
+                    </param>
+                    <when value="blast">
+                        <param name="keepblastout" type="boolean" checked="true" label="Do you want to get the blast results?" help="Used to re-run OrthoFinder_OnlyGroups from pre-computed blast results"/>
+                    </when>
+                    <when value="diamond"></when>
+                    <when value="blast_gz"></when>
+                </conditional>
             </when>
 
             <when value="blast">
@@ -77,7 +90,6 @@
                 <param name="seqIDs" type="data" format="txt" label="Select the SequencesIds file" help="SequencesIDs.txt file from the blast output files of a previous OrthoFinder_OnlyGroups run."/>
             </when>
         </conditional>
-        
         <param argument="-I" type="float" value="1.5" label="Inflation parameter" help="Modify inflation parameter for MCL. Not recommended." />
     </inputs>
     <outputs>
@@ -88,21 +100,25 @@
         <data format="csv" name="unassigned_genes" label="Orthogroups_UnassignedGenes.csv" from_work_dir="results/Orthogroups_UnassignedGenes.csv" />
         <data format="csv" name="stat_overall" label="Statistics_Overall.csv" from_work_dir="results/Statistics_Overall.csv" />
         <data format="csv" name="stat_specs" label="Statistics_PerSpecies.csv" from_work_dir="results/Statistics_PerSpecies.csv" />
-        
+
         <!-- working directory : blast outputs-->
         <collection name="wdblast" type="list" label="Blast_outputs">
-            <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/blast/" />
-            <filter>init['start']=="fasta" and init['keepblastout']</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/blast" />
+            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
         </collection>
         <collection name="wdfasta" type="list" label="Fasta_from_blast" >
-            <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/fa/" />
-            <filter>init['start']=="fasta" and init['keepblastout']</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fa" directory="results/WorkingDirectory/fa" format="fasta" />
+            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
+        </collection>
+        <collection name="genetrees" type="list" label="Gene trees">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="results/WorkingDirectory/trees" format="nhx" />
+            <filter>dogenetrees</filter>
         </collection>
         <data format="txt" name="SpeciesIDs" label="SpeciesIDs.txt" from_work_dir="results/WorkingDirectory/SpeciesIDs.txt" >
-            <filter>init['start']=="fasta" and init['keepblastout']</filter>
+            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']"</filter>
         </data>
         <data format="txt" name="SequenceIDs" label="SequencesIDs.txt" from_work_dir="results/WorkingDirectory/SequenceIDs.txt" >
-            <filter>init['start']=="fasta" and init['keepblastout']</filter>
+            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
         </data>
     </outputs>
     <tests>
@@ -111,7 +127,10 @@
             <conditional name="init">
                 <param name="start" value="fasta" />
                 <param name="input_fasta" ftype="fasta" value="inputs/no_fa_ext/AcAcaud_trinity,inputs/no_fa_ext/AmAmphi_trinity,inputs/no_fa_ext/ApApomp_trinity,inputs/no_fa_ext/AsAsp1_trinity" />
-                <param name="keepblastout" value="no" />
+                <conditional name="search">
+                    <param name="search_program" value="blast"/>
+                    <param name="keepblastout" value="false" />
+                </conditional>
             </conditional>
             <param name="inflation" value="1.5" />
             <output name="specs_overlap">
@@ -164,7 +183,10 @@
             <conditional name="init">
                 <param name="start" value="fasta" />
                 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
-                <param name="keepblastout" value="no" />
+                <conditional name="search">
+                    <param name="search_program" value="blast"/>
+                    <param name="keepblastout" value="false" />
+                </conditional>
             </conditional>
             <param name="inflation" value="1.5" />
             <output name="specs_overlap">
@@ -212,12 +234,15 @@
                 </assert_contents>
             </output>
         </test>
-        
+
         <test>
             <conditional name="init">
                 <param name="start" value="fasta" />
                 <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
-                <param name="keepblastout" value="yes" />
+                <conditional name="search">
+                    <param name="search_program" value="blast"/>
+                    <param name="keepblastout" value="true" />
+                </conditional>
             </conditional>
             <param name="inflation" value="1.5" />
             <output name="specs_overlap">
@@ -268,7 +293,63 @@
             <output_collection name="wdfasta" type="list" count="4"/>
             <output_collection name="wdblast" type="list" count="16"/>
         </test>
-        
+
+        <!-- test -S diamond -->
+        <test>
+            <conditional name="init">
+                <param name="start" value="fasta" />
+                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
+                <conditional name="search">
+                    <param name="search_program" value="diamond"/>
+                </conditional>
+            </conditional>
+            <param name="inflation" value="1.5" />
+            <output name="specs_overlap">
+                <assert_contents>
+                    <has_text text="Mycoplasma_agalactiae"/>
+                    <has_text text="Mycoplasma_gallisepticum"/>
+                    <has_text text="Mycoplasma_genitalium"/>
+                    <has_text text="Mycoplasma_hyopneumoniae"/>
+                    <has_n_columns n="5"/>
+                </assert_contents>
+            </output>
+            <output name="unassigned_genes">
+                <assert_contents>
+                    <has_text text="Mycoplasma_agalactiae"/>
+                    <has_text text="Mycoplasma_gallisepticum"/>
+                    <has_text text="Mycoplasma_genitalium"/>
+                    <has_text text="Mycoplasma_hyopneumoniae"/>
+                    <has_n_columns n="5"/>
+                </assert_contents>
+            </output>
+            <output name="stat_overall">
+                <assert_contents>
+                    <has_text text="Number of genes in orthogroups"/>
+                    <has_text text="Number of unassigned genes"/>
+                    <has_text text="Percentage of orthogroups"/>
+                    <has_text text="Number of orthogroups"/>
+                    <has_text text="Number of genes"/>
+                    <has_text text="G50 (assigned genes)"/>
+                    <has_text text="G50 (all genes)"/>
+                    <has_text text="O50 (assigned genes)"/>
+                    <has_text text="O50 (all genes)"/>
+                </assert_contents>
+            </output>
+            <output name="stat_specs">
+                <assert_contents>
+                    <has_text text="Mycoplasma_agalactiae"/>
+                    <has_text text="Mycoplasma_gallisepticum"/>
+                    <has_text text="Mycoplasma_genitalium"/>
+                    <has_text text="Mycoplasma_hyopneumoniae"/>
+                    <has_text text="Number of genes per-species in orthogroup"/>
+                    <has_text text="Percentage of orthogroups"/>
+                    <has_text text="Number of orthogroups"/>
+                    <has_text text="Number of genes"/>
+                    <has_n_columns n="5"/>
+                </assert_contents>
+            </output>
+        </test>
+
         <!-- test orthofinder -b -og -->
         <test>
             <conditional name="init">
@@ -339,6 +420,13 @@
         - The SpeciesIDs.txt file
         - The SequencesIDs.txt file
 
+----------
+Parameters
+----------
+    - Sequence search program : You can choose either blast, blast_gz, or diamond (diamond is faster)
+    - Get the blast results : Check "Yes" if, while using blast as the sequence search program, you want to retrieve the blast output files
+    - Inflation : the inflation parameter; modify this parameter is not recommended.
+
     </help>
     <citations>
         <citation type="doi">10.1186/s13059-015-0721-2</citation>