changeset 31:3aae4f16ac9e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit e4cbbb48006ac767c69efe53eab2a63306124bb5
author iuc
date Sat, 04 Oct 2025 17:04:38 +0000
parents c7275bd8b4d6
children
files snpEff.xml snpEff_create_db.xml snpEff_download.xml snpEff_macros.xml snpeff_get_chr_names.xml test-data/dbs/ebola_zaire/snpEffectPredictor.bin test-data/snpeffv_genomedb.loc test-data/snpeffv_regulationdb.loc tool_data_table_conf.xml.test
diffstat 9 files changed, 177 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/snpEff.xml	Mon Nov 18 22:15:34 2024 +0000
+++ b/snpEff.xml	Sat Oct 04 17:04:38 2025 +0000
@@ -4,7 +4,9 @@
         <import>snpEff_macros.xml</import>
     </macros>
     <requirements>
-        <expand macro="requirement" />
+        <expand macro="requirement">
+            <requirement type="package" version="9.5">coreutils</requirement>
+        </expand>
     </requirements>
     <expand macro="stdio" />
     <expand macro="version_command" />
@@ -47,15 +49,23 @@
         #if $intervals     ### fix this for multiple dataset input
           -interval intervals.bed
         #end if
-        #if $statsFile:
-          -stats '$statsFile'
-        #end if
-        #if $csvStats:
-            -csvStats '$csvFile'
-        #end if
         #if str($chr).strip() != '':
           -chr '$chr'
         #end if
+        #if $generate_stats or $generate_gene_stats or $csvStats:
+          #if $csvStats:
+            $csvStats snpeff_stats.csv
+          #end if
+          #if $generate_stats or ($generate_gene_stats and not $csvStats):
+            ## the base name passed in via the -csvStats or the -s option also determines the name of the genes.txt file
+            ## so in the absence of the first we need the second to have a consistent name of the genes.txt file
+            -s snpeff_stats.html
+          #end if
+        #else:
+          ## when no stats output is requested by the user, we can make things a little more efficient
+          ## by telling snpEff that it doesn't have to write even the default (html and genes.txt) ones.
+          -noStats
+        #end if
           $noLog
         ## Regulation names can include parentheses: H3K4me3-MSC_(VB)_enriched_sites
         ## Enclose them in in single and double quotes, as the conda snpEff bash script will remove outer quotes
@@ -88,17 +98,15 @@
           '$snpDb.genome_version'
         #end if
         '$input' > '$snpeff_output'
-        #if $statsFile:
-            &&
-            #import os
-            #if $csvStats:
-                #set $genes_file = str($csvFile) + '.genes.txt'
-            #else
-                #set $genes_file = str($statsFile) + '.genes.txt'
-            #end if
-            #set $genes_file_name = os.path.split($genes_file)[-1]
-            mkdir '$statsFile.files_path' &&
-            mv '$genes_file' '#echo os.path.join($statsFile.files_path, $genes_file_name)#'
+        #if $generate_gene_stats:
+          ## remove the first, unnecessary comment line from the output
+          && tail -n+2 snpeff_stats.genes.txt > genes.txt
+        #end if
+        #if $generate_stats:
+          ## independently of whether the user asked for the gene.txt file,
+          ## we need to add it to files_path because the stats html report links to it.
+          && mkdir '$statsFile.files_path' &&
+          mv snpeff_stats.genes.txt $statsFile.files_path
         #end if
     ]]></command>
     <inputs>
@@ -114,7 +122,8 @@
             <option value="bedAnn">BED annotations</option>
         </param>
         <param argument="-csvStats" type="boolean" truevalue="-csvStats" falsevalue="" checked="false" label="Create CSV report?" help="Useful for downstream analyses and report generation" />
-        <param argument="-noStats" name="generate_stats" type="boolean" truevalue="" falsevalue="-noStats" checked="true" label="Produce Summary Stats?" help="Generates an HTML summary of results"/>
+        <param name="generate_stats" type="boolean" checked="true" label="Produce Summary Stats?" help="Generates an HTML summary of results"/>
+        <param name="generate_gene_stats" type="boolean" label="Produce Gene Statistics output?" help="Generates a table of effects per gene as an extra output"/>
         <conditional name="snpDb">
             <param name="genomeSrc" type="select" label="Genome source">
                 <!-- These options are referenced in the help section of SnpEff download tool. If you change them, change help of SnpEff download as well -->
@@ -127,14 +136,15 @@
                 <param name="genomeVersion" type="select" label="Genome">
                     <!--GENOME    DESCRIPTION-->
                     <options from_data_table="snpeffv_genomedb">
-                            <filter type="static_value" name="snpeff_version" value="@SNPEFF_VERSION@" column="1"/>
-                            <filter type="unique_value" column="2" />
+                        <filter type="regexp" column="1" value="@COMPATIBLE_DB_VERSIONS_REGEX@" />
+                        <filter type="unique_value" column="2" />
                     </options>
                 </param>
                 <section name="reg_section" expanded="false" title="Regulation options">
                     <param name="regulation" type="select" display="checkboxes" multiple="true" label="Non-coding and regulatory annotation" help="These are available for only a few genomes">
                         <options from_data_table="snpeffv_regulationdb">
-                            <filter type="param_value" ref="genomeVersion" key="genome" column="2" />
+                            <filter type="regexp" column="1" value="@COMPATIBLE_DB_VERSIONS_REGEX@" />
+                            <filter type="param_value" ref="genomeVersion" column="2" />
                             <filter type="unique_value" column="3" />
                         </options>
                     </param>
@@ -142,7 +152,7 @@
             </when>
             <when value="history">
                 <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
-                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
+                    <validator type="expression" message="This version of SnpEff will only work with @COMPATIBLE_DB_VERSIONS_STRING@ genome databases">value.metadata.snpeff_version in @COMPATIBLE_DB_VERSIONS@</validator>
                 </param>
                 <section name="reg_section" expanded="false" title="Regulation options">
                     <!-- From metadata -->
@@ -160,7 +170,7 @@
             </when>
             <when value="custom">
                 <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
-                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
+                    <validator type="expression" message="This version of SnpEff will only work with @COMPATIBLE_DB_VERSIONS_STRING@ genome databases">value.metadata.snpeff_version in @COMPATIBLE_DB_VERSIONS@</validator>
                 </param>
                 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">
                     <option selected="true" value="Standard">Standard</option>
@@ -188,7 +198,7 @@
                     <option value="Trematode_Mitochondrial">Trematode_Mitochondrial</option>
                     <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option>
                     <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option>
-            </param>
+                </param>
             </when>
         </conditional>
         <param name="udLength" argument="-ud" type="select" label="Upstream / Downstream length">
@@ -328,22 +338,43 @@
                 <when input="outputConditional.outputFormat" value="bedAnn" format="bed" />
             </change_format>
         </data>
-        <data name="statsFile" format="html" label="${tool.name} on ${on_string} - HTML stats">
+        <data name="statsFile" format="html" label="${tool.name} on ${on_string} - HTML stats" from_work_dir="snpeff_stats.html">
             <filter>generate_stats</filter>
         </data>
-        <data name="csvFile" format="csv" label="${tool.name} on ${on_string} - CSV stats">
+        <data name="genes_file" format="tabular" label="${tool.name} on ${on_string} - Gene stats" from_work_dir="genes.txt">
+            <filter>generate_gene_stats</filter>
+        </data>
+        <data name="csvFile" format="txt" label="${tool.name} on ${on_string} - CSV stats" from_work_dir="snpeff_stats.csv">
             <filter>csvStats</filter>
         </data>
     </outputs>
     <tests>
+        <test expect_num_outputs="1">
+            <param name="input" ftype="vcf" value="input.vcf"/>
+            <param name="inputFormat" value="vcf"/>
+            <param name="outputFormat" value="vcf"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="cached"/>
+                <param name="genomeVersion" value="ebola_zaire"/>
+            </conditional>
+            <param name="udLength" value="0"/>
+            <param name="generate_stats" value="false"/>
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text_matching expression="KJ660346.1\t572\t.*missense_variant" />
+                    <has_text_matching expression="KJ660346.1\t1024\t.*synonymous_variant" />
+                </assert_contents>
+            </output>
+        </test>
         <test expect_num_outputs="2">
             <param name="input" ftype="vcf" value="input.vcf"/>
             <param name="inputFormat" value="vcf"/>
             <param name="outputFormat" value="vcf"/>
-            <param name="genomeSrc" value="named"/>
-            <param name="genome_version" value="ebola_zaire"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="cached"/>
+                <param name="genomeVersion" value="ebola_zaire"/>
+            </conditional>
             <param name="udLength" value="0"/>
-            <param name="generate_stats" value="true"/>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text_matching expression="KJ660346.1\t572\t.*missense_variant" />
@@ -356,14 +387,39 @@
                 </assert_contents>
             </output>
         </test>
+        <test expect_num_outputs="2">
+            <param name="input" ftype="vcf" value="input.vcf"/>
+            <param name="inputFormat" value="vcf"/>
+            <param name="outputFormat" value="vcf"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="cached"/>
+                <param name="genomeVersion" value="ebola_zaire"/>
+            </conditional>
+            <param name="udLength" value="0"/>
+            <param name="generate_stats" value="false"/>
+            <param name="generate_gene_stats" value="true"/>
+            <output name="snpeff_output">
+                <assert_contents>
+                    <has_text_matching expression="KJ660346.1\t572\t.*missense_variant" />
+                    <has_text_matching expression="KJ660346.1\t1024\t.*synonymous_variant" />
+                </assert_contents>
+            </output>
+            <output name="genes_file">
+                <assert_contents>
+                    <has_text text="#GeneName"/>
+                </assert_contents>
+            </output>
+        </test>
         <!-- Test interval option-->
         <test expect_num_outputs="2">
             <param name="input" ftype="vcf" value="input.vcf"/>
             <param name="inputFormat" value="vcf"/>
             <param name="outputFormat" value="vcf"/>
-            <param name="genomeSrc" value="named"/>
-            <param name="interval" value="intervals.bed"/>
-            <param name="genome_version" value="ebola_zaire"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="cached"/>
+                <param name="genomeVersion" value="ebola_zaire"/>
+            </conditional>
+            <param name="intervals" value="intervals.bed"/>
             <param name="udLength" value="0"/>
             <param name="generate_stats" value="false"/>
             <param name="csvStats" value="true"/>
--- a/snpEff_create_db.xml	Mon Nov 18 22:15:34 2024 +0000
+++ b/snpEff_create_db.xml	Sat Oct 04 17:04:38 2025 +0000
@@ -126,8 +126,10 @@
     <tests>
         <test expect_num_outputs="2">
             <param name="genome_version" value="pBR322"/>
-            <param name="input_type_selector" value="gb"/>
-            <param name="input" value="pBR322.gbk" />
+            <conditional name="input_type">
+                <param name="input_type_selector" value="gb"/>
+                <param name="input" value="pBR322.gbk" />
+            </conditional>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="pBR322" />
@@ -137,8 +139,10 @@
         </test>
         <test expect_num_outputs="2">
             <param name="genome_version" value="pBR322"/>
-            <param name="input_type_selector" value="gb"/>
-            <param name="input" value="pBR322.gbk.gz" />
+            <conditional name="input_type">
+                <param name="input_type_selector" value="gb"/>
+                <param name="input" value="pBR322.gbk.gz" />
+            </conditional>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="pBR322" />
@@ -148,10 +152,14 @@
         </test>
         <test expect_num_outputs="1">
             <param name="genome_version" value="pBR322"/>
-            <param name="input_type_selector" value="gff"/>
-            <param name="reference_source_selector" value="history"/>
-            <param name="input_fasta" value="pBR322_test2.fna" />
-            <param name="input" value="pBR322.gff3"/>
+            <conditional name="input_type">
+                <param name="input_type_selector" value="gff"/>
+                <param name="input" value="pBR322.gff3"/>
+                <conditional name="reference_source">
+                    <param name="reference_source_selector" value="history"/>
+                    <param name="input_fasta" value="pBR322_test2.fna" />
+                </conditional>
+            </conditional>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="pBR322" />
@@ -160,10 +168,14 @@
         </test>
         <test expect_num_outputs="1">
             <param name="genome_version" value="pBR322"/>
-            <param name="input_type_selector" value="gff"/>
-            <param name="reference_source_selector" value="history"/>
-            <param name="input_fasta" value="pBR322_test2.fna.gz" />
-            <param name="input" value="pBR322.gff3"/>
+            <conditional name="input_type">
+                <param name="input_type_selector" value="gff"/>
+                <param name="input" value="pBR322.gff3"/>
+                <conditional name="reference_source">
+                    <param name="reference_source_selector" value="history"/>
+                    <param name="input_fasta" value="pBR322_test2.fna.gz" />
+                </conditional>
+            </conditional>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="pBR322" />
@@ -172,10 +184,14 @@
         </test>
         <test expect_num_outputs="1">
             <param name="genome_version" value="Saccharomyces_mito"/>
-            <param name="input_type_selector" value="gtf"/>
-            <param name="reference_source_selector" value="history"/>
-            <param name="input_fasta" value="Saccharomyces_mito.fa.gz" />
-            <param name="input" value="Saccharomyces_mito.gtf" />
+            <conditional name="input_type">
+                <param name="input_type_selector" value="gtf"/>
+                <param name="input" value="Saccharomyces_mito.gtf" />
+                <conditional name="reference_source">
+                    <param name="reference_source_selector" value="history"/>
+                    <param name="input_fasta" value="Saccharomyces_mito.fa.gz" />
+                </conditional>
+            </conditional>
             <output name="snpeff_output">
                 <assert_contents>
                     <has_text text="Saccharomyces_mito" />
--- a/snpEff_download.xml	Mon Nov 18 22:15:34 2024 +0000
+++ b/snpEff_download.xml	Sat Oct 04 17:04:38 2025 +0000
@@ -22,21 +22,20 @@
         <data name="snpeff_db" format="snpeffdb" label="@SNPEFF_VERSION@ ${genome_version} database"/>
     </outputs>
     <tests>
-        <test>
-            <param name="genome_version" value="ebola_zaire"/>
+        <test expect_failure="true">
+            <!-- The only meaningful test for this tool currently often, but not always, fails
+            when run from github because the download attempt from github gets blocked
+            by the data provider.
+            As a workaround we make the test fail consistently.
+            Put the "e" back on "zair" for an actual download attempt. -->
+            <param name="genome_version" value="ebola_zair"/>
+            <!-- then also uncomment the ouput assertion
             <output name="snpeff_db">
                 <assert_contents>
                     <has_text text="ebola_zaire" />
                 </assert_contents>
             </output>
-        </test>
-        <test>
-            <param name="genome_version" value="Bdellovibrio_bacteriovorus_hd100"/>
-            <output name="snpeff_db">
-                <assert_contents>
-                    <has_text text="Bdellovibrio_bacteriovorus_hd100" />
-                </assert_contents>
-            </output>
+            -->
         </test>
     </tests>
     <help><![CDATA[
--- a/snpEff_macros.xml	Mon Nov 18 22:15:34 2024 +0000
+++ b/snpEff_macros.xml	Sat Oct 04 17:04:38 2025 +0000
@@ -1,7 +1,19 @@
 <macros>
+  <!-- TOKENS TO BE UPDATED -->
   <token name="@TOOL_VERSION@">5.2</token>
-  <token name="@VERSION_SUFFIX@">0</token>
+  <token name="@VERSION_SUFFIX@">1</token>
   <token name="@SNPEFF_VERSION@">SnpEff5.2</token>
+  <!-- SnpEff versions are usually backwards compatible with a few older database versions.
+  The authoritative place to look up the compatibility scheme is DATABASE_COMPATIBLE_VERSIONS in Config.java of the upstream code.
+  Currently this would be:
+  https://github.com/pcingola/SnpEff/blob/master/src/main/java/org/snpeff/snpEffect/Config.java#L37-L52
+
+  The three following tokens define the list of compatible DB versions for the wrapper (for input validation), a human-readable string (for parameter help/labels) and a regex of the same versions (for filtering of data table records). -->
+  <token name="@COMPATIBLE_DB_VERSIONS@">['SnpEff5.0', 'SnpEff5.1', 'SnpEff5.2']</token>
+  <token name="@COMPATIBLE_DB_VERSIONS_STRING@">SnpEff 5.0 - 5.2</token>
+  <token name="@COMPATIBLE_DB_VERSIONS_REGEX@"><![CDATA[^SnpEff5\.[0-2]$]]></token>
+  <!-- End of TOKENS TO BE UPDATED -->
+
   <xml name="requirement">
       <requirement type="package" version="@TOOL_VERSION@">snpeff</requirement>
       <yield/>
--- a/snpeff_get_chr_names.xml	Mon Nov 18 22:15:34 2024 +0000
+++ b/snpeff_get_chr_names.xml	Sat Oct 04 17:04:38 2025 +0000
@@ -42,15 +42,15 @@
                 <param name="genomeVersion" type="select" label="Genome">
                     <help>This can only be used on built-in databases manually configured by your galaxy admin.</help>
                     <options from_data_table="snpeffv_genomedb">
-                            <filter type="static_value" name="SNPEFF_VERSION" value="@SNPEFF_VERSION@" column="1"/>
-                            <filter type="unique_value" column="2" />
+                        <filter type="regexp" column="1" value="@COMPATIBLE_DB_VERSIONS_REGEX@" />
+                        <filter type="unique_value" column="2" />
                     </options>
                 </param>
             </when>
             <when value="history">
                 <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
                     <help>This can only be used on databases in your history that were downloaded using the snpEff download tool.</help>
-                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
+                    <validator type="expression" message="This version of SnpEff will only work with @COMPATIBLE_DB_VERSIONS_STRING@ genome databases">value.metadata.snpeff_version in @COMPATIBLE_DB_VERSIONS@</validator>
                 </param>
             </when>
             <when value="named">
@@ -62,7 +62,7 @@
             <when value="custom">
                 <param name="snpeff_db" type="data" format="snpeffdb" label="@SNPEFF_VERSION@ Genome Data">
                     <help>This can only be used on databases in your history that were created using the snpEff build tool.</help>
-                    <validator type="expression" message="This version of SnpEff will only work with @SNPEFF_VERSION@ genome databases">value is not None and value.metadata.snpeff_version == "@SNPEFF_VERSION@"</validator>
+                    <validator type="expression" message="This version of SnpEff will only work with @COMPATIBLE_DB_VERSIONS_STRING@ genome databases">value.metadata.snpeff_version in @COMPATIBLE_DB_VERSIONS@</validator>
                 </param>
             </when>
         </conditional>
@@ -72,18 +72,22 @@
     </outputs>
     <tests>
         <test>
-            <param name="genomeSrc" value="named"/>
-            <param name="genome_version" value="Bacillus_subtilis_subsp_subtilis_str_168"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="cached"/>
+                <param name="genomeVersion" value="ebola_zaire"/>
+            </conditional>
             <output name="chr_names">
                 <assert_contents>
-                    <has_text text="Chromosome" />
-                    <has_text text="4215606" />
+                    <has_text text="KJ660346" />
+                    <has_text text="18959" />
                 </assert_contents>
             </output>
         </test>
         <test expect_failure="True">
-            <param name="genomeSrc" value="named"/>
-            <param name="genome_version" value="should_not_match"/>
+            <conditional name="snpDb">
+                <param name="genomeSrc" value="named"/>
+                <param name="genome_version" value="should_not_match"/>
+            </conditional>
         </test>
     </tests>
     <help><![CDATA[
Binary file test-data/dbs/ebola_zaire/snpEffectPredictor.bin has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/snpeffv_genomedb.loc	Sat Oct 04 17:04:38 2025 +0000
@@ -0,0 +1,5 @@
+## Downloaded Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+## the Description field in this sample is "Genome : Version"
+#Key	snpeff_version	Version	Description	data_dir	path
+SnpEff5.0_ebola_zaire	SnpEff5.0	ebola_zaire	Ebola : ebola_zaire	${__HERE__}/dbs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/snpeffv_regulationdb.loc	Sat Oct 04 17:04:38 2025 +0000
@@ -0,0 +1,5 @@
+## Regulation Databases for SnpEff
+## These are from the list on: http://snpeff.sourceforge.net/download.html
+#Key	snpeff_version	genome	regulation_name description
+#SnpEff4.0_GRCh37.74	SnpEff4.0	GRCh37.74	CD4	CD4
+#SnpEff4.1_GRCh38.76	SnpEff4.1	GRCh38.76	CD4	CD4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Sat Oct 04 17:04:38 2025 +0000
@@ -0,0 +1,10 @@
+<tables>
+    <table name="snpeffv_genomedb" comment_char="#" allow_duplicate_entries="False">
+        <columns>key, version, value, name, path</columns>
+        <file path="${__HERE__}/test-data/snpeffv_genomedb.loc" />
+    </table>
+    <table name="snpeffv_regulationdb" comment_char="#" allow_duplicate_entries="False">
+        <columns>key, version, genome, value, name</columns>
+        <file path="${__HERE__}/test-data/snpeffv_regulationdb.loc" />
+    </table>
+</tables>