diff sniffles.xml @ 1:3f6f028f418f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc commit d2d7bf4fbdd781458833134cee92c46ff6a4f603
author iuc
date Tue, 24 Sep 2024 19:30:03 +0000
parents 93c4b04a0769
children 09f5c6f3088a
line wrap: on
line diff
--- a/sniffles.xml	Mon Sep 14 07:39:07 2020 +0000
+++ b/sniffles.xml	Tue Sep 24 19:30:03 2024 +0000
@@ -1,8 +1,11 @@
-<tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0">
+<tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0" profile="23.0">
     <description>Structural variation caller using third generation sequencing</description>
     <macros>
-        <token name="@TOOL_VERSION@">1.0.12</token>
+        <token name="@TOOL_VERSION@">2.4</token>
     </macros>
+    <xrefs>
+        <xref type="bio.tools">sniffles</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">sniffles</requirement>
     </requirements>
@@ -13,82 +16,69 @@
         <![CDATA[
 ln -f -s '${input}' input.bam &&
 ln -f -s '${input.metadata.bam_index}' input.bam.bai &&
-
 sniffles
 -t \${GALAXY_SLOTS:-2}
--m 'input.bam'
+-i 'input.bam'
 -v '$output'
-## general_options
-#if $general_options.min_support:
-    --min_support $general_options.min_support
-#end if
-    --max_num_splits $general_options.max_num_splits
-#if $general_options.max_distance:
-    --max_distance $general_options.max_distance
-#end if
-#if $general_options.min_length:
-    --min_length $general_options.min_length
-#end if
-    --minmapping_qual $general_options.minmapping_qual
-    --num_reads_report $general_options.num_reads_report
-#if $general_options.min_seq_size:
-    --min_seq_size $general_options.min_seq_size
+## must set allow-overwrite since the new output vcf file exists
+--allow-overwrite
+#if $reference_genome.genome_type_select != "None":
+    #if $reference_genome.genome_type_select == "indexed":
+        --reference '${reference_genome.genome.fields.path}' 
+    #else:
+        --reference '${reference_genome.genome}' 
+    #end if
 #end if
-    --min_zmw $general_options.min_zmw
-    $general_options.cs_string
+## general_options
+    --minsupport '$general_options.minsupport'
+    --max-splits-kb '$general_options.maxsplitskb'
+    --minsvlen '$general_options.minsvlen'
+    --mapq '$general_options.mapq'
+    --min-alignment-length '$general_options.minalignmentlength'
 ## clustering_options
-    $clustering_options.cluster
-#if $clustering_options.cluster_support:
-    --cluster_support $clustering_options.cluster_support
-#end if
-    --allelefreq $clustering_options.allelefreq
-    --min_homo_af $clustering_options.min_homo_af
-    --min_het_af $clustering_options.min_het_af
-##advanced_options
-    $advanced_options.report_BND
-    $advanced_options.not_report_seq
-    $advanced_options.ignore_sd
-    $advanced_options.ccs_reads
-## parameter_estimation_options
-    $parameter_estimation_options.skip_parameter_estimation
-    --del_ratio $parameter_estimation_options.del_ratio
-    --ins_ratio $parameter_estimation_options.ins_ratio
-    --max_diff_per_window $parameter_estimation_options.max_diff_per_window
-    --max_dist_aln_events $parameter_estimation_options.max_dist_aln_events
+    --cluster-binsize '$clustering_options.clusterbinsize'
+    --cluster-r '$clustering_options.clusterr'
+## advanced_options 
+    $advanced_options.mosaic
     ]]>
     </command>
     <inputs>
         <param type="data" name="input" format="bam" label="Input BAM file"/>
+        <conditional name="reference_genome">
+            <param name="genome_type_select" type="select" label="Reference genome source is required for deletion SV sequence reporting" 
+              help="Optional: Select None, a built-in or history reference genome fasta">
+                <option value="None" selected="True">No reference fasta - do not report DEL SV sequence</option>
+                <option value="indexed">Use a Galaxy server built-in genome</option>
+                <option value="history">Use a genome fasta file from the current history</option>
+            </param>
+            <when value="None">
+                <param name="genome" type="text" value="None"/>
+            </when>
+            <when value="indexed">
+                <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome" 
+                  help="If not listed, add a custom genome or use a reference genome from the history">
+                    <options from_data_table="all_fasta">
+                        <validator message="No genomes are available " type="no_options"/>
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/>
+            </when>
+        </conditional>
         <section name="general_options" title="Set general options" expanded="False">
-            <param argument="--min_support" type="integer" value="10" optional="true" min="1" label="Minimum Support" help="Minimum number of reads that support a SV. [10]" />
-            <param argument="--max_num_splits" type="integer" value="7" optional="true" min="0" label="Maximum Number of Splits" help="Maximum number of splits per read to be still taken into account. [7]" />
-            <param argument="--max_distance" type="integer" value="1000" optional="true" min="10" label="Maximum Distance" help="Maximum distance to group SV together. [1000]" />
-            <param argument="--min_length" type="integer" value="30" optional="true" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [30]"/>
-            <param argument="--minmapping_qual" type="integer" value="20" optional="true" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality. [20]"/>
-            <param argument="--num_reads_report" type="integer" value="0" optional="true" min="-1" label="Number of reads to report" help="Report up to N reads that support the SV in the vcf file. -1: report all. [0]"/>
-            <param argument="--min_seq_size" type="integer" value="" optional="true" label="Minimum Seq Size" help="Discard read if non of its segment is larger then this. [2000]"/>
-            <param argument="--min_zmw" type="integer" value="0" optional="true" min="0" label="Minimum ZMW" help="Discard SV that are not supported by at least x zmws. This applies only for PacBio recognizable reads. [0]"/>
-            <param argument="--cs_string" type="boolean" truevalue="--cs_string" falsevalue="" optional="true" label="Enable CS String" help="Enables the scan of CS string instead of Cigar and MD.  [false]"/>
+            <param argument="--minsupport" type="text" value="auto" label="Minimum Support" help="Minimum number of reads that support a SV. [auto]. Smaller support values -> more SV reported"/>
+            <param name="maxsplitskb" type="float" value="0.1" min="0" label="Maximum Number of Splits per KB" help="Additional number of splits per kilobase read sequence allowed before reads are ignored [0.1]" />
+            <param name="minsvlen" type="integer" value="50" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [50]"/>
+            <param name="mapq" type="integer" value="20" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality to consider. [20]"/>
+            <param name="minalignmentlength" type="integer" value="100" min="0" label="Minimum alignment length" help="Reads with alignments shorter than this length (in bp) will be ignored"/>
         </section>
         <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False">
-            <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" optional="true" label="Cluster" help="Enables Sniffles to phase SVs that occur on the same reads [false]"/>
-            <param argument="--cluster_support" type="integer" value="1" optional="true" min="1" label="Cluster Support" help="Minimum number of reads supporting clustering of SV. [1]"/>
-            <param argument="--allelefreq" type="float" value="0" optional="true" min="0" label="Allele Frequency Threshold" help="Filters the SV calls based on the allele frequency. [0]" />
-            <param argument="--min_homo_af" type="float" value="0.8" optional="true" min="0" max="1" label="Minimum Homogenous Allele Frequency" help="Minimum homogeneous threshold on allele frequency (0-1).  [0.8]"/>
-            <param argument="--min_het_af" type="float" value="0.3" optional="true" min="0" max="1" label="Minimum Heterogeneous Allele Frequency" help="Minimum heterogeneous threshold on allele frequency (0-1).  [0.3]"/>
+            <param argument="--clusterbinsize" value="100" type="integer" min="0" label="Cluster bin size" help="Initial screening bin size [100]"/>
+            <param argument="--clusterr" type="float" value="2.5" min="0.0" label="Cluster Multiplier" help="Multiplier for SV start position standard deviation criterion in cluster merging [2.5]"/>
         </section>
         <section name="advanced_options" title="Advanced options" expanded="False">
-            <param argument="--report_BND" type="boolean" value="True" truevalue="--report_BND" falsevalue="" optional="true" label="Report BND" help="Report BND instead of Tra in vcf output.  [true]" />
-            <param argument="--not_report_seq" type="boolean" value="False" truevalue="--not_report_seq" falsevalue="" optional="true" label="Don't report seq" help="Don't report sequences for indels in vcf output. (Beta version!)  [false]"/>
-            <param argument="--ignore_sd" type="boolean" value="False" truevalue="--ignore_sd" falsevalue="" optional="true" label="Igonore sd" help="Ignores the sd based filtering.  [false]"/>
-            <param argument="--ccs_reads" type="boolean" value="False" truevalue="--ccs_reads" falsevalue="" optional="true" label="CCS Reads" help="Preset CCS Pacbio setting. (Beta)  [false]" />
-        </section>
-        <section name="parameter_estimation_options" title="Parameter Estimation Options" expanded="False">
-            <param argument="--skip_parameter_estimation" type="boolean" value="False" truevalue="--skip_parameter_estimation" falsevalue="" optional="true" label="Skip Parameter Estimation" help="Enables the scan if only very few reads are present.  [false]"/>
-            <param argument="--del_ratio" type="float" value="0.0458369" optional="true" min="0" max="1" label="Estimated Deletion Ratio" help="Estimated ratio of deletions per read (0-1).  [0.0458369]" />
-            <param argument="--ins_ratio" type="float" value="0.049379" optional="true" min="0" max="1" label="Estimated Insertion Ratio" help="Estimated ratio of insertions per read (0-1).  [0.049379]" />
-            <param argument="--max_diff_per_window" type="integer" value="50" optional="true" min="0" label="Maximum Differences Per Window" help="Maximum differences per 100bp. [50]"/>
-            <param argument="--max_dist_aln_events" type="integer" value="4" optional="true" min="0" label="Maximum Distance Between Alignment Events" help="Maximum distance between alignment (indel) events. [4]"/>
+            <param name="mosaic" type="boolean" value="False" truevalue="--mosaic" falsevalue="" label="Mosaic mode" help="Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)" />
         </section>
     </inputs>
     <outputs>
@@ -97,38 +87,27 @@
     <tests>
         <test> <!-- test 1 - standard run -->
             <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <output name="output" file="expected_output.vcf" lines_diff="2"/>
+            <output name="output" file="expected_output.vcf" lines_diff="4"/>
         </test>
-        <test> <!-- test 2 - add reads into report -->
+        <test> <!-- test 2 - filter on mapq -->
             <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <param name="num_reads_report" value="-1"/>
-            <output name="output" file="expected_output2.vcf" lines_diff="2"/>
+            <param name="mapq" value="0"/>
+            <output name="output" file="expected_output2.vcf" lines_diff="4"/>
         </test>
-        <test> <!-- test 3 - use cs_string -->
+        <test> <!-- test 3 min support test -->
             <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <param name="cs_string" value="true"/>
-            <output name="output" file="expected_outcome3.vcf" lines_diff="2"/>
+            <param name="minsupport" value="1"/>
+            <output name="output" file="expected_outcome3.vcf" lines_diff="4"/>
         </test>
         <test> <!-- test 4 - clustering -->
             <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <param name="cluster" value="True"/>
-            <output name="output" file="expected_outcome4.vcf" lines_diff="2"/>
+            <param name="clusterbinsize" value="5"/>
+            <output name="output" file="expected_outcome4.vcf" lines_diff="4"/>
         </test>
-        <test> <!-- test 5 - Advanced - Report BND -->
+        <test> <!-- test 5 - Advanced - mosaic -->
             <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <param name="report_BND" value="True"/>
-            <output name="output" file="expected_outcome5.vcf" lines_diff="2"/>
-        </test>
-        <test> <!-- test 6 - Parameter Estimation - skip -->
-            <param name="input" value="reads_region.bam"/>
-            <param name="output_format" value="vcf"/>
-            <param name="skip_parameter_estimation" value="True"/>
-            <output name="output" file="expected_outcome6.vcf" lines_diff="2"/>
+            <param name="mosaic" value="--mosaic"/>
+            <output name="output" file="expected_outcome5.vcf" lines_diff="4"/>
         </test>
     </tests>
     <help>
@@ -139,12 +118,15 @@
 
 What is Sniffles?
 *****************
-Sniffles is a SV caller for long reads. It is mainly designed for PacBio reads, but also works on Oxford Nanopore reads. SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication). Furthermore, Sniffles incorporates multiple auto tuning functions to determine data set depending parameter to reduce the overall risk of falsely infer SVs.
+Sniffles is a SV caller for long reads. Sniffles2 accurately detect SVs on germline, somatic and population-level for PacBio and Oxford Nanopore read data.
+
+SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). 
+Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication).
 
-Quick Start
-***********
+Inputs
+******
 
-Make sure you have a sorted bam file either from ngmlr or from bwa. For the later make sure you have used -M parameter for mapping to mark which alignments are primary and which are secondary! Note you have to adjust the parameters for low coverage cases.
+Known to work with Minimap2 bam as input
 
 Parameters
 **********
@@ -152,100 +134,56 @@
 General
 -------
 
+
 +---------------------------+-----------------------------------------------------------------------+
 | Parameter                 | Description                                                           |
 +===========================+=======================================================================+
-| Minimum Support           | Minimum number of reads that support a SV to be reported. Default: 10 |
-+---------------------------+-----------------------------------------------------------------------+
-| Maximum Number of Splits  | Maximum number of split segments a read is aligned at before it is    |
-|                           | ignored. Default: 7                                                   |
+| Minimum Support           | Minimum number of reads supporting a SV to be reported. Default:auto  |
 +---------------------------+-----------------------------------------------------------------------+
-| Maximum Distance          | Maximum distance to group SV together. Sniffles estimates this        |
-|                           | parameter during runtime to group together SVs reported by different  |
-|                           | reads. Default: 1kb                                                   |
+| Maximum Number of Splits  | Maximum number of split segments per kb a read is aligned at before   |
+|                           | it is ignored. Default: 7                                             |
 +---------------------------+-----------------------------------------------------------------------+
-| Minimum Length            | Minimum length of SV to be reported. Default: 30bp                    |
+| Minimum SV Length         | Minimum length of SV to be reported. Default: 50bp                    |
 +---------------------------+-----------------------------------------------------------------------+
 | Minimum Mapping Quality   | Minimum mapping quality of alignment to be taken into account.        |
 |                           | Default: 20                                                           |
 +---------------------------+-----------------------------------------------------------------------+
-| Number of Reads to Report | Number of read names to be reported that support the SV in the vcf    |
-|                           | file. Default: 0                                                      |
-+---------------------------+-----------------------------------------------------------------------+
-| Minimum Seq Size          | Discard read if none of its segment is larger then this. Default: 2kb |
-+---------------------------+-----------------------------------------------------------------------+
-| Minimum ZMW               | Discard SV that are not supported by at least x zmws. This applies    |
-|                           | only for PacBio recognizable reads. Default: 0                        |
-+---------------------------+-----------------------------------------------------------------------+
-| Enable CS String          | Enables the scan of CS string instead of Cigar and MD.  Default: False|
+| Minimum alignment length  | Reads with less length aligned will be ignored. Default 100           |
 +---------------------------+-----------------------------------------------------------------------+
 
-|
 
 Clustering Options
 ------------------
 
+
 +----------------------------------------+-----------------------------------------------------------------------+
 | Parameter                              | Description                                                           |
 +========================================+=======================================================================+
-| Cluster                                | Performs read based phasing to mark SVs that occur together.          |
-+----------------------------------------+-----------------------------------------------------------------------+
-| Cluster Support                        | Minimum number of reads supporting clustering of SV. Default: 1       |
+| Cluster bin size                       | Initial cluster bin size. Default 100                                 |
 +----------------------------------------+-----------------------------------------------------------------------+
-| Allele Frequency Threshold             | Filters the SV calls based on the allele frequency. Default: 0.0      |
-+----------------------------------------+-----------------------------------------------------------------------+
-| Minimum Homogenous Allele Frequency    | Minimum homogeneous threshold on allele frequency. Default: 0.8       |
-+----------------------------------------+-----------------------------------------------------------------------+
-| Minimum Heterogeneous Allele Frequency | Minimum heterogeneous threshold on allele frequency. Default: 0.3     |
+| Cluster Multiplier                     | Multiplier for SV start position standard deviation criterion in      |
+|                                        | cluster merging [2.5]                                                 |
 +----------------------------------------+-----------------------------------------------------------------------+
 
-|
 
 Advanced Options
 ----------------
 
-+----------------------------------------+------------------------------------------------------------------------+
-| Parameter                              | Description                                                            |
-+========================================+========================================================================+
-| Report BND                             | Reports the inversions and translocations as BND events. Default: False|
-+----------------------------------------+------------------------------------------------------------------------+
-| Don't Report Seq                       | Don't report sequences for indels in vcf output. (Beta version!)       |
-|                                        | Default: False                                                         |
-+----------------------------------------+------------------------------------------------------------------------+
-| Ignore sd                              | Ignores the sd based filtering. Default: False                         |
-+----------------------------------------+------------------------------------------------------------------------+
-| CCS Reads                              | Preset CCS Pacbio setting. (Beta) Default: False                       |
-+----------------------------------------+------------------------------------------------------------------------+
-
-|
 
-Parameter Estimation Options
-----------------------------
++----------------------------------------+------------------------------------------------------------------------------+
+| Parameter                              | Description                                                                  |
++========================================+==============================================================================+
+| Mosaic                                 | Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)|
++----------------------------------------+------------------------------------------------------------------------------+
 
-+----------------------------------------+------------------------------------------------------------------------+
-| Parameter                              | Description                                                            |
-+========================================+========================================================================+
-| Skip Parameter Estimation              | Enables the scan if only very few reads are present. Default: False    |
-+----------------------------------------+------------------------------------------------------------------------+
-| Estimated Deletion Ratio               | Estimated ratio of deletions per read.  Default: 0.0458369             |
-+----------------------------------------+------------------------------------------------------------------------+
-| Estimated Insertion Ratio              | Estimated ratio of insertions per read. Default: 0.049379              |
-+----------------------------------------+------------------------------------------------------------------------+
-| Maximum Differences Per Window         | Maximum differences per 100bp. Default: 50                             |
-+----------------------------------------+------------------------------------------------------------------------+
-| Maximum Distance Between Alignment     | Maximum distance between alignment (indel) events. Default: 4          |
-| Events                                 |                                                                        |
-+----------------------------------------+------------------------------------------------------------------------+
-
-|
 
 Output
 ******
 
 VCF Info field description
-|
+
 Sniffles report multiple information in the Info field. The entries are delimited by:
-|
+
 +-------------------+------------------------------------------------------------------------------------------------------+
 | IMPRECISE/PRECISE | Indicates the confidence of the exact breakpoint positions (bp).                                     |
 +-------------------+------------------------------------------------------------------------------------------------------+
@@ -275,11 +213,11 @@
 +-------------------+------------------------------------------------------------------------------------------------------+
 | AF=               | Allele frequency (only if run with –genotype)                                                        |
 +-------------------+------------------------------------------------------------------------------------------------------+
-|
-Source: https://github.com/fritzsedlazeck/Sniffles/wiki
+
+Source: https://github.com/fritzsedlazeck/Sniffles
     ]]>
     </help>
     <citations>
-        <citation type="doi">10.1038/s41592-018-0001-7</citation>
+        <citation type="doi">10.1038/s41587-023-02024-y</citation>
     </citations>
 </tool>