changeset 5:fd52f65372c9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/deepvariant commit bf3bb5d5a1f442208f054523fbcf0be4cc366a35
author iuc
date Mon, 02 Feb 2026 12:34:14 +0000
parents 63b68fe4af85
children
files deepvariant.xml macros.xml test-data/phix174.par_regions.bed
diffstat 3 files changed, 213 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/deepvariant.xml	Tue Mar 05 08:10:46 2024 +0000
+++ b/deepvariant.xml	Mon Feb 02 12:34:14 2026 +0000
@@ -1,4 +1,4 @@
-<tool id='deepvariant' name='DeepVariant' version='@TOOL_VERSION@+galaxy@SUFFIX_VERSION@' profile='20.01'>
+<tool id='deepvariant' name='DeepVariant' version='@TOOL_VERSION@+galaxy@SUFFIX_VERSION@' profile='@PROFILE@'>
     <description>deep learning-based variant caller</description>
     <macros>
         <import>macros.xml</import>
@@ -11,6 +11,14 @@
         #if $regions_conditional.regions_option == 'bed'
             && ln -s '${regions_conditional.bed_file}' region.bed
         #end if
+        #if $par_regions_bed
+            && ln -s '${par_regions_bed}' par_regions.bed
+        #end if
+        #set $logging_dir_value = None
+        #if $create_runtime_report
+            #set $logging_dir_value = 'logging'
+            && mkdir -p '$logging_dir_value'
+        #end if
         #if $reference_genome.source == 'history':
             #set $ref_genome = 'reference.fasta'
             && ln -s -f '${reference_genome.history_item}' $ref_genome
@@ -21,17 +29,43 @@
         && run_deepvariant
         --model_type=$model_type
         --ref=$ref_genome
-        --reads=reads_alignment.bam
+        --reads='reads_alignment.bam'
+        #if $sample_name
+            --sample_name '$sample_name'
+        #end if
         --output_vcf='./output.vcf.gz'
         #if $output_gvcf
             --output_gvcf='./output.g.vcf.gz'
         #end if
         #if $regions_conditional.regions_option == 'region'
-            --regions $regions_conditional.region_literal
+            --regions '$regions_conditional.region_literal'
         #else if $regions_conditional.regions_option == 'bed'
-            --regions region.bed
+            --regions 'region.bed'
+        #end if
+        --disable_small_model=$disable_small_model
+        #if $haploid_contigs
+            --haploid_contigs='$haploid_contigs'
+        #end if
+        #if $par_regions_bed
+            --par_regions_bed='par_regions.bed'
+        #end if
+        #if $logging_dir_value
+            --logging_dir='$logging_dir_value'
         #end if
-        ##--call_variants_extra_args="use_openvino=true" ## Setting this will use OpenVINO on Intel CPUs, which empirically reduces call_variants runtime by 15%-25%.
+        #if $report_title
+            --report_title='$report_title'
+        #end if
+        --runtime_report=$create_runtime_report
+        #if $make_examples_extra_args
+            --make_examples_extra_args='$make_examples_extra_args'
+        #end if
+        #if $call_variants_extra_args
+            --call_variants_extra_args='$call_variants_extra_args'
+        #end if
+        #if $postprocess_variants_extra_args
+            --postprocess_variants_extra_args='$postprocess_variants_extra_args'
+        #end if
+        $vcf_stats_report
         --num_shards=\${GALAXY_SLOTS:-2}
         && gunzip './output.vcf.gz'
         #if $output_gvcf
@@ -57,6 +91,7 @@
             </when>
         </conditional>
         <param argument="--reads" type="data" format="bam" label="BAM file" help="An aligned reads file in BAM format. The reads must be aligned to the reference genome" />
+        <param argument="--sample_name" type="text" optional="true" label="Sample name" help="Sample name to use instead of the SM tag in the BAM header. Example: NA12878" />
         <param argument="--model_type" type="select" label="Model type" help="Type of model to use for variant calling">
             <option value="WGS">WGS: Illumina whole genome sequencing</option>
             <option value="WES">WES: Illumina whole exome sequencing</option>
@@ -88,10 +123,28 @@
             </when>
         </conditional>
         <param argument="--output_gvcf" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Generate genomic VCF (gVCF) output" help="The key difference between a regular VCF and a gVCF is that the gVCF has records for all sites, whether there is a variant call there or not. The goal is to have every site represented in the file in order to do joint analysis of a cohort in subsequent steps" />
+        <section name="advanced_options" title="Advanced options" expanded="false">
+            <param argument="--disable_small_model" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Disable small model" help="Disable the use of the small model to call variants during make_examples." />
+            <param argument="--haploid_contigs" type="text" optional="true" label="Haploid contigs" help="Comma-separated list of non-autosomal chromosomes. Example: chrX,chrY" />
+            <param argument="--par_regions_bed" type="data" format="bed" optional="true" label="PAR regions BED file" help="BED file of pseudoautosomal regions." />
+            <param argument="--report_title" type="text" optional="true" label="Report title" help="Title for the VCF stats report (HTML). Example: NA12878 DeepVariant report" />
+            <param argument="--vcf_stats_report" type="boolean" truevalue="--vcf_stats_report=True" falsevalue="--vcf_stats_report=False" checked="False" label="Create VCF stats report (HTML)" help="Output a visual report (HTML) of statistics about the output VCF." />
+            <param name="create_runtime_report" argument="--runtime_report" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Create runtime report" help="Output make_examples runtime metrics and create a visual runtime report." />
+        </section>
+        <section name="expert_options" title="Expert options (may increase runtime and disk usage)" expanded="false">
+            <param argument="--make_examples_extra_args" type="text" optional="true" label="make_examples extra args" help="Comma-separated flag_name=flag_value for make_examples.py. Example: min_base_quality=10,min_mapping_quality=5" />
+            <param argument="--call_variants_extra_args" type="text" optional="true" label="call_variants extra args" help="Comma-separated flag_name=flag_value for call_variants.py. Example: allow_empty_examples=true,batch_size=1024" />
+            <param argument="--postprocess_variants_extra_args" type="text" optional="true" label="postprocess_variants extra args" help="Comma-separated flag_name=flag_value for postprocess_variants.py. Example: cnn_homref_call_min_gq=20.0" />
+        </section>
     </inputs>
     <outputs>
         <data name="vcf_file" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string}: VCF file"/>
-        <data name="html_report" format="html" from_work_dir="output.visual_report.html" label="${tool.name} on ${on_string}: HTML report"/>
+        <data name="html_report" format="html" from_work_dir="output.visual_report.html" label="${tool.name} on ${on_string}: HTML report">
+            <filter>advanced_options['vcf_stats_report']</filter>
+        </data>
+        <data name="runtime_report" format="html" from_work_dir="logging/make_examples_runtime_by_region_report.html" label="${tool.name} on ${on_string}: runtime report">
+            <filter>advanced_options['create_runtime_report']</filter>
+        </data>
         <data name="gvcf_file" format="vcf" from_work_dir="output.g.vcf" label="${tool.name} on ${on_string}: gVCF file">
             <filter>output_gvcf</filter>
         </data>
@@ -108,15 +161,21 @@
             <conditional name="regions_conditional">
                 <param name="regions_option" value="disabled"/>
             </conditional>
-            <output name="vcf_file" file="output.vcf" ftype="vcf">
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+                <param name="report_title" value="Test Report Title"/>
+            </section>
+            <output name="vcf_file" ftype="vcf">
                 <assert_contents>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="2478" delta="10"/>
+                    <has_text text="#CHROM"/>
                 </assert_contents>
             </output>
-            <output name="html_report" file="report.html" ftype="html">
+            <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="23176" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Test Report Title"/>
                 </assert_contents>
             </output>
         </test>
@@ -132,15 +191,20 @@
                 <param name="regions_option" value="region"/>
                 <param name="region_literal" value="K03455:1-2669"/>
             </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
             <output name="vcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="1843" delta="10"/>
                 </assert_contents>
             </output>
             <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="18894" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
                 </assert_contents>
             </output>
         </test>
@@ -156,15 +220,20 @@
                 <param name="regions_option" value="bed"/>
                 <param name="bed_file" value="region.bed" ftype="bed"/>
             </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
             <output name="vcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="1843" delta="10"/>
                 </assert_contents>
             </output>
             <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="18894" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
                 </assert_contents>
             </output>
         </test>
@@ -181,21 +250,26 @@
                 <param name="regions_option" value="region"/>
                 <param name="region_literal" value="K03455:1-2669"/>
             </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
             <output name="vcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="1843" delta="10"/>
                 </assert_contents>
             </output>
-            <output name="gvcf_file" file="output.g.vcf" ftype="vcf">
+            <output name="gvcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="3192" delta="10" />
                 </assert_contents>
             </output>
             <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="18894" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
                 </assert_contents>
             </output>
         </test>
@@ -210,15 +284,20 @@
             <conditional name="regions_conditional">
                 <param name="regions_option" value="disabled"/>
             </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
             <output name="vcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="2478"/>
                 </assert_contents>
             </output>
             <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="23176" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
                 </assert_contents>
             </output>
         </test>
@@ -233,15 +312,118 @@
             <conditional name="regions_conditional">
                 <param name="regions_option" value="disabled"/>
             </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
             <output name="vcf_file" ftype="vcf">
                 <assert_contents>
+                    <has_text text="#CHROM"/>
                     <has_text text="##fileformat=VCFv4.2"/>
-                    <has_size value="2478"/>
+                </assert_contents>
+            </output>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test runtime report output-->
+        <test expect_num_outputs="3">
+            <conditional name="reference_genome">
+                <param name="source" value="history"/>
+                <param name="history_item" value="reference.fasta"/>
+            </conditional>
+            <param name="reads" value="reads.bam"/>
+            <param name="model_type" value="WGS"/>
+            <conditional name="regions_conditional">
+                <param name="regions_option" value="disabled"/>
+            </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+                <param name="create_runtime_report" value="True"/>
+            </section>
+            <output name="vcf_file" ftype="vcf">
+                <assert_contents>
+                    <has_text text="#CHROM"/>
+                    <has_text text="##fileformat=VCFv4.2"/>
                 </assert_contents>
             </output>
             <output name="html_report" ftype="html">
                 <assert_contents>
-                    <has_size value="23176" delta="100"/>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
+                </assert_contents>
+            </output>
+            <output name="runtime_report" ftype="html">
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_text text="Overall runtime"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test haploid contigs and PAR regions BED-->
+        <test expect_num_outputs="2">
+            <conditional name="reference_genome">
+                <param name="source" value="history"/>
+                <param name="history_item" value="reference.fasta"/>
+            </conditional>
+            <param name="reads" value="reads.bam"/>
+            <param name="model_type" value="WGS"/>
+            <conditional name="regions_conditional">
+                <param name="regions_option" value="disabled"/>
+            </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+                <param name="haploid_contigs" value="K03455"/>
+                <param name="par_regions_bed" value="phix174.par_regions.bed" ftype="bed"/>
+            </section>
+            <output name="vcf_file" ftype="vcf">
+                <assert_contents>
+                    <has_text text="#CHROM"/>
+                    <has_text text="##fileformat=VCFv4.2"/>
+                </assert_contents>
+            </output>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test expert options extra-args-->
+        <test expect_num_outputs="2">
+            <conditional name="reference_genome">
+                <param name="source" value="history"/>
+                <param name="history_item" value="reference.fasta"/>
+            </conditional>
+            <param name="reads" value="reads.bam"/>
+            <param name="model_type" value="WGS"/>
+            <conditional name="regions_conditional">
+                <param name="regions_option" value="disabled"/>
+            </conditional>
+            <section name="advanced_options">
+                <param name="vcf_stats_report" value="True"/>
+            </section>
+            <section name="expert_options">
+                <param name="make_examples_extra_args" value="min_base_quality=10"/>
+                <param name="call_variants_extra_args" value="allow_empty_examples=true"/>
+                <param name="postprocess_variants_extra_args" value="cnn_homref_call_min_gq=20.0"/>
+            </section>
+            <output name="vcf_file" ftype="vcf">
+                <assert_contents>
+                    <has_text text="#CHROM"/>
+                    <has_text text="##fileformat=VCFv4.2"/>
+                </assert_contents>
+            </output>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_n_lines n="34" delta="10" />
+                    <has_text text="Variant types"/>
                 </assert_contents>
             </output>
         </test>
@@ -254,11 +436,13 @@
 
 DeepVariant is a deep learning-based variant caller that takes aligned reads (in BAM or CRAM format), produces pileup image tensors from them, classifies each tensor using a convolutional neural network, and finally reports the results in a standard VCF or gVCF file.
 
+DeepVariant runs a pipeline of three steps: **make_examples**, **call_variants**, and **postprocess_variants**. The **Expert options** section exposes extra-args fields for each step. Use these only with parameters that are accepted by the corresponding DeepVariant binary (see `run_deepvariant --helpfull` and the subcommand help, e.g. `/opt/deepvariant/bin/make_examples --helpfull` in the container).
+
 DeepVariant supports germline variant-calling in diploid organisms.
 
-- NGS (Illumina) data for either a `whole genome <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-case-study.md>`_ or `whole exome <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-exome-case-study.md>`_.
-- PacBio HiFi data, see the `PacBio case study <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-pacbio-model-case-study.md>`_.
-- Hybrid PacBio HiFi + Illumina WGS, see the `hybrid case study <https://github.com/google/deepvariant/blob/r1.2/docs/deepvariant-hybrid-case-study.md>`_.
+- NGS (Illumina) data for either a `whole genome <https://github.com/google/deepvariant/blob/r1.9/docs/deepvariant-case-study.md>`_ or `whole exome <https://github.com/google/deepvariant/blob/r1.9/docs/deepvariant-exome-case-study.md>`_.
+- PacBio HiFi data, see the `PacBio case study <https://github.com/google/deepvariant/blob/r1.9/docs/deepvariant-pacbio-model-case-study.md>`_.
+- Hybrid PacBio HiFi + Illumina WGS, see the `hybrid case study <https://github.com/google/deepvariant/blob/r1.9/docs/deepvariant-hybrid-case-study.md>`_.
 
 Please also note:
 
--- a/macros.xml	Tue Mar 05 08:10:46 2024 +0000
+++ b/macros.xml	Mon Feb 02 12:34:14 2026 +0000
@@ -1,6 +1,7 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.5.0</token>
-    <token name="@SUFFIX_VERSION@">1</token>
+    <token name="@TOOL_VERSION@">1.9.0</token>
+    <token name="@SUFFIX_VERSION@">0</token>
+    <token name="@PROFILE@">23.1</token>
     <xml name="edam_ontology">
         <edam_topics>                                                                                  
             <edam_topic>topic_0199</edam_topic>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phix174.par_regions.bed	Mon Feb 02 12:34:14 2026 +0000
@@ -0,0 +1,1 @@
+K03455	0	100