diff busco.xml @ 23:4e70d88adf2f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/busco/ commit 68696449a909c43d0e44bc9cfc8f8945e8a9dfce
author iuc
date Fri, 04 Apr 2025 11:18:42 +0000
parents e5c372c91e46
children
line wrap: on
line diff
--- a/busco.xml	Tue Oct 29 14:13:47 2024 +0000
+++ b/busco.xml	Fri Apr 04 11:18:42 2025 +0000
@@ -8,11 +8,37 @@
     </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
+        <!-- update augustus_species if update the augustus pin -->
+        <requirement type="package" version="3.5.0">augustus</requirement>
         <requirement type="package" version="1.34">tar</requirement>
         <requirement type="package" version="1">fonts-conda-ecosystem</requirement>
+        <!-- TODO might be removed in the future: for some reason conda installs an outdated sepp 4.4.0 but the latest dendropy 5.x which are incompatible -->
+        <requirement type="package" version="4.5.5">sepp</requirement>
     </requirements>
     <version_command>busco --version</version_command>
     <command><![CDATA[
+## tool tests can not run with --offline (otherwise we would need to store a lot of data at IUC)
+## so we create a mock dir in the working dir where the tool can write to
+## 
+## a more thorough test can be executed as follows
+## - set the `test` parameters to `""`
+## - download complete reference DB (~200G, final 105G) to tools/busco/test-data/test-db/busco_downloads
+##   ```
+##   busco --download_path tools/busco/test-data/test-db/busco_downloads/ --download all
+##   find tools/busco/test-data/test-db/busco_downloads/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -exec rm -rf {} \;
+##   find tools/busco/test-data/test-db/busco_downloads/placement_files -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -delete
+##   find tools/busco/test-data/test-db/busco_downloads/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
+##   ```
+## - test containerized (note: test-data is mounted ro in containerized tests)
+## 
+## alternatively .. a bit weaker
+## - set the `test` parameters to `""` comment --offline
+## - run test NOT containerized (this will download the data needed for the tests)
+## - uncomment --offline and test containerized
+#if $test == 'true'
+    mkdir mock_db_path/ &&
+    cp -r '$cached_db.fields.path'/* mock_db_path/ &&
+#end if
 
 #if $busco_mode.mode == 'geno' and $busco_mode.use_augustus.use_augustus_selector == 'augustus':
 
@@ -25,21 +51,27 @@
     mkdir -p 'augustus_dir/species/' &&
     tar -C 'augustus_dir/species/' -xzf '${busco_mode.use_augustus.aug_prediction.augustus_model}' &&
 #end if
+#end if
 
+#if $input.is_of_type("fasta.gz")
+    gunzip -c '$input' > input.fa &&
+#else
+    ln -s '$input' input.fa &&
 #end if
 
 busco
---in '${input}'
+--in 'input.fa'
 --mode '${busco_mode.mode}'
 --out busco_galaxy
 --cpu \${GALAXY_SLOTS:-4}
 --evalue ${adv.evalue}
 --limit ${adv.limit}
 --contig_break ${adv.contig_break}
-
-#if $lineage_conditional.selector == 'cached':
-    --offline
-    --download_path $lineage_conditional.cached_db.fields.path
+#if $test == 'true'
+--download_path mock_db_path
+#else
+--offline
+--download_path '$cached_db.fields.path'
 #end if
 
 #if $lineage.lineage_mode == "auto_detect":
@@ -65,46 +97,36 @@
 #end if
     
 #if $outputs and 'image' in $outputs:
-    &&
-    mkdir BUSCO_summaries
-    &&
-    ls -l busco_galaxy/run_*/ &&
-    cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
-    &&
-    generate_plot.py -wd BUSCO_summaries -rt specific
+    && mkdir BUSCO_summaries
+    && cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
+    && generate_plot.py -wd BUSCO_summaries -rt specific
 #end if
 
 #if $outputs and 'gff' in $outputs:
-    &&
-    echo "\##gff-version 3" > busco_output.gff
-    &&
+    && echo "\##gff-version 3" > busco_output.gff
     ## gff files can be absent
-    cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_output.gff 2> /dev/null || true
+    && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_output.gff 2> /dev/null || true)
 #end if
-
-
+#if $outputs and 'faa' in $outputs:
+    && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.faa >> busco_output.faa 2> /dev/null || true)
+#end if
+#if $outputs and 'fna' in $outputs:
+    && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.fna >> busco_output.fna 2> /dev/null || true)
+#end if
 ]]></command>
     <inputs>
-        <param type="data" name="input" format="fasta" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set."/>
-        <conditional name="lineage_conditional">
-            <param name="selector" type="select" label="Lineage data source">
-                <option value="download">Download lineage data</option>
-                <option value="cached">Use cached lineage data</option>
-            </param>
-            <when value="cached">
-                <param name="cached_db" label="Cached database with lineage" type="select">
-                    <options from_data_table="busco_database">
-                        <validator message="No BUSCO database is available" type="no_options"/>
-                    </options>
-                </param>
-            </when>
-            <when value="download"/>
-        </conditional>
+        <param name="test" type="hidden"/>
+        <param type="data" name="input" format="fasta,fasta.gz" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set."/>
+        <param name="cached_db" label="Cached database with lineage" type="select">
+            <options from_data_table="busco_database">
+                <validator message="No BUSCO database is available - please contact your Galaxy admin." type="no_options"/>
+            </options>
+        </param>
         <conditional name="busco_mode">
             <param argument="--mode" type="select" label="Mode">
                 <option value="geno">Genome assemblies (DNA)</option>
                 <option value="tran">Transcriptome assemblies (DNA)</option>
-                <option value="prot">annotated gene sets (protein)</option>
+                <option value="prot">Annotated gene sets (protein)</option>
             </param>
             <when value="geno">
                 <conditional name="use_augustus">
@@ -127,7 +149,7 @@
                                 <param name="augustus_model" type="data" format="augustus" label="Augustus model"/>
                             </when>
                             <when value="builtin">
-                                <param name="augustus_species" type="select" label="Augustus species model">
+                                <param name="augustus_species" type="select" label="Augustus species model" help="If model name is different than species name it is shown in parentheses." >
                                     <expand macro="augustus_species"/>
                                 </param>
                             </when>
@@ -153,7 +175,9 @@
             </when>
             <when value="select_lineage">
                 <param argument="--lineage_dataset" type="select" label="Lineage">
-                    <expand macro="lineages"/>
+                    <options from_data_table="busco_database_options">
+                        <filter type="param_value" column="2" ref="cached_db"/>
+                    </options>
                 </param>
             </when>
         </conditional>
@@ -163,6 +187,8 @@
             <option value="missing">list with missing IDs</option>
             <option value="image">summary image</option>
             <option value="gff">gff</option>
+            <option value="faa">Protein sequences</option>
+            <option value="fna">Nucleotide sequences</option>
         </param>
 
         <section name="adv" title="Advanced Options" expanded="False">
@@ -186,11 +212,18 @@
         <data name='busco_gff' format='gff3' label="${tool.name} on ${on_string}: GFF" from_work_dir="busco_output.gff">
             <filter>outputs and 'gff' in outputs</filter>
         </data>              
+        <data name='busco_faa' format='fasta' label="${tool.name} on ${on_string}: Protein sequences" from_work_dir="busco_output.faa">
+            <filter>outputs and 'faa' in outputs</filter>
+        </data>              
+        <data name='busco_fna' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences" from_work_dir="busco_output.fna">
+            <filter>outputs and 'fna' in outputs</filter>
+        </data>              
     </outputs>
     
     <tests>
-        <test expect_num_outputs="4">
-            <param name="input" value="genome.fa"/>
+        <!-- <test expect_num_outputs="6">
+            <param name="test" value="true"/>
+	        <param name="input" value="genome.fa.gz" ftype="fasta.gz"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
                 <param name="lineage_dataset" value="arthropoda_odb10"/>
@@ -201,7 +234,7 @@
                     <param name="use_augustus_selector" value="augustus"/>
                 </conditional>
             </conditional>
-            <param name="outputs" value="short_summary,missing,gff"/>
+            <param name="outputs" value="short_summary,missing,gff,faa,fna"/>
             <output name="busco_sum">
                 <assert_contents>
                     <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
@@ -215,9 +248,23 @@
                     <has_text text="##gff-version 3"/>
                 </assert_contents>
             </output>
+            <output name="busco_fna">
+                <assert_contents>
+                    <has_text text=">"/>
+                </assert_contents>
+            </output>
+            <output name="busco_faa">
+                <assert_contents>
+                    <has_text text=">"/>
+                </assert_contents>
+            </output>
             <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="10"/>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
         <test expect_num_outputs="5">
+            <param name="test" value="true"/>
             <param name="input" value="proteome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -242,8 +289,12 @@
                     <has_text text="##gff-version 3"/>
                 </assert_contents>
             </output>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
         <test expect_num_outputs="5">
+            <param name="test" value="true"/>
             <param name="input" value="transcriptome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -262,8 +313,12 @@
             <output name="busco_missing" file="transcriptome_results/missing_buscos_list" compare="diff" lines_diff="6"/>
             <output name="summary_image" file="transcriptome_results/summary.png" compare="sim_size"/>
             <output name="busco_gff" file="transcriptome_results/out.gff" compare="diff"/>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
         <test expect_num_outputs="3">
+            <param name="test" value="true"/>
             <param name="input" value="genome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -292,8 +347,12 @@
                     <has_text text="##gff-version 3"/>
                 </assert_contents>
             </output>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
         <test expect_num_outputs="3">
+            <param name="test" value="true"/>
             <param name="input" value="genome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -318,8 +377,12 @@
             </output>
             <output name="busco_table" file="genome_results/full_table" compare="diff" lines_diff="10"/>
             <output name="busco_missing" file="genome_results/missing_buscos_list" compare="diff" lines_diff="10"/>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
         <test expect_num_outputs="5">
+            <param name="test" value="true"/>
             <param name="input" value="genome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -351,8 +414,12 @@
             </output>
             <output name="summary_image" file="genome_results_metaeuk/summary.png" compare="sim_size"/>
             <output name="busco_gff" file="genome_results_metaeuk/out.gff3" compare="diff" lines_diff="6"/>
-        </test>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
+        </test> -->
         <test expect_num_outputs="5">
+            <param name="test" value="true"/>
             <param name="input" value="genome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="auto_detect"/>
@@ -384,8 +451,12 @@
             </output>
             <output name="summary_image" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/>
             <output name="busco_gff" file="genome_results_metaeuk_auto/out.gff" compare="diff"/>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
         </test>
-        <test expect_num_outputs="3">
+        <!-- <test expect_num_outputs="3">
+            <param name="test" value="true"/>
             <param name="input" value="genome.fa"/>
             <conditional name="lineage">
                 <param name="lineage_mode" value="select_lineage"/>
@@ -404,7 +475,10 @@
                     <has_text text="9647at6656"/>
                 </assert_contents>
             </output>
-        </test>
+            <assert_stdout>
+                <has_text text="BUSCO analysis done"/>
+            </assert_stdout>
+        </test> -->
     </tests>
     <help><![CDATA[