view sniffles.xml @ 0:93c4b04a0769 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit f5977355970ef4994957147d2d8a96fe6605e2b4"
author iuc
date Mon, 14 Sep 2020 07:39:07 +0000
parents
children 3f6f028f418f
line wrap: on
line source

<tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0">
    <description>Structural variation caller using third generation sequencing</description>
    <macros>
        <token name="@TOOL_VERSION@">1.0.12</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">sniffles</requirement>
    </requirements>
    <version_command>
        <![CDATA[ sniffles --version ]]>
    </version_command>
    <command detect_errors="exit_code">
        <![CDATA[
ln -f -s '${input}' input.bam &&
ln -f -s '${input.metadata.bam_index}' input.bam.bai &&

sniffles
-t \${GALAXY_SLOTS:-2}
-m 'input.bam'
-v '$output'
## general_options
#if $general_options.min_support:
    --min_support $general_options.min_support
#end if
    --max_num_splits $general_options.max_num_splits
#if $general_options.max_distance:
    --max_distance $general_options.max_distance
#end if
#if $general_options.min_length:
    --min_length $general_options.min_length
#end if
    --minmapping_qual $general_options.minmapping_qual
    --num_reads_report $general_options.num_reads_report
#if $general_options.min_seq_size:
    --min_seq_size $general_options.min_seq_size
#end if
    --min_zmw $general_options.min_zmw
    $general_options.cs_string
## clustering_options
    $clustering_options.cluster
#if $clustering_options.cluster_support:
    --cluster_support $clustering_options.cluster_support
#end if
    --allelefreq $clustering_options.allelefreq
    --min_homo_af $clustering_options.min_homo_af
    --min_het_af $clustering_options.min_het_af
##advanced_options
    $advanced_options.report_BND
    $advanced_options.not_report_seq
    $advanced_options.ignore_sd
    $advanced_options.ccs_reads
## parameter_estimation_options
    $parameter_estimation_options.skip_parameter_estimation
    --del_ratio $parameter_estimation_options.del_ratio
    --ins_ratio $parameter_estimation_options.ins_ratio
    --max_diff_per_window $parameter_estimation_options.max_diff_per_window
    --max_dist_aln_events $parameter_estimation_options.max_dist_aln_events
    ]]>
    </command>
    <inputs>
        <param type="data" name="input" format="bam" label="Input BAM file"/>
        <section name="general_options" title="Set general options" expanded="False">
            <param argument="--min_support" type="integer" value="10" optional="true" min="1" label="Minimum Support" help="Minimum number of reads that support a SV. [10]" />
            <param argument="--max_num_splits" type="integer" value="7" optional="true" min="0" label="Maximum Number of Splits" help="Maximum number of splits per read to be still taken into account. [7]" />
            <param argument="--max_distance" type="integer" value="1000" optional="true" min="10" label="Maximum Distance" help="Maximum distance to group SV together. [1000]" />
            <param argument="--min_length" type="integer" value="30" optional="true" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [30]"/>
            <param argument="--minmapping_qual" type="integer" value="20" optional="true" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality. [20]"/>
            <param argument="--num_reads_report" type="integer" value="0" optional="true" min="-1" label="Number of reads to report" help="Report up to N reads that support the SV in the vcf file. -1: report all. [0]"/>
            <param argument="--min_seq_size" type="integer" value="" optional="true" label="Minimum Seq Size" help="Discard read if non of its segment is larger then this. [2000]"/>
            <param argument="--min_zmw" type="integer" value="0" optional="true" min="0" label="Minimum ZMW" help="Discard SV that are not supported by at least x zmws. This applies only for PacBio recognizable reads. [0]"/>
            <param argument="--cs_string" type="boolean" truevalue="--cs_string" falsevalue="" optional="true" label="Enable CS String" help="Enables the scan of CS string instead of Cigar and MD.  [false]"/>
        </section>
        <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False">
            <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" optional="true" label="Cluster" help="Enables Sniffles to phase SVs that occur on the same reads [false]"/>
            <param argument="--cluster_support" type="integer" value="1" optional="true" min="1" label="Cluster Support" help="Minimum number of reads supporting clustering of SV. [1]"/>
            <param argument="--allelefreq" type="float" value="0" optional="true" min="0" label="Allele Frequency Threshold" help="Filters the SV calls based on the allele frequency. [0]" />
            <param argument="--min_homo_af" type="float" value="0.8" optional="true" min="0" max="1" label="Minimum Homogenous Allele Frequency" help="Minimum homogeneous threshold on allele frequency (0-1).  [0.8]"/>
            <param argument="--min_het_af" type="float" value="0.3" optional="true" min="0" max="1" label="Minimum Heterogeneous Allele Frequency" help="Minimum heterogeneous threshold on allele frequency (0-1).  [0.3]"/>
        </section>
        <section name="advanced_options" title="Advanced options" expanded="False">
            <param argument="--report_BND" type="boolean" value="True" truevalue="--report_BND" falsevalue="" optional="true" label="Report BND" help="Report BND instead of Tra in vcf output.  [true]" />
            <param argument="--not_report_seq" type="boolean" value="False" truevalue="--not_report_seq" falsevalue="" optional="true" label="Don't report seq" help="Don't report sequences for indels in vcf output. (Beta version!)  [false]"/>
            <param argument="--ignore_sd" type="boolean" value="False" truevalue="--ignore_sd" falsevalue="" optional="true" label="Igonore sd" help="Ignores the sd based filtering.  [false]"/>
            <param argument="--ccs_reads" type="boolean" value="False" truevalue="--ccs_reads" falsevalue="" optional="true" label="CCS Reads" help="Preset CCS Pacbio setting. (Beta)  [false]" />
        </section>
        <section name="parameter_estimation_options" title="Parameter Estimation Options" expanded="False">
            <param argument="--skip_parameter_estimation" type="boolean" value="False" truevalue="--skip_parameter_estimation" falsevalue="" optional="true" label="Skip Parameter Estimation" help="Enables the scan if only very few reads are present.  [false]"/>
            <param argument="--del_ratio" type="float" value="0.0458369" optional="true" min="0" max="1" label="Estimated Deletion Ratio" help="Estimated ratio of deletions per read (0-1).  [0.0458369]" />
            <param argument="--ins_ratio" type="float" value="0.049379" optional="true" min="0" max="1" label="Estimated Insertion Ratio" help="Estimated ratio of insertions per read (0-1).  [0.049379]" />
            <param argument="--max_diff_per_window" type="integer" value="50" optional="true" min="0" label="Maximum Differences Per Window" help="Maximum differences per 100bp. [50]"/>
            <param argument="--max_dist_aln_events" type="integer" value="4" optional="true" min="0" label="Maximum Distance Between Alignment Events" help="Maximum distance between alignment (indel) events. [4]"/>
        </section>
    </inputs>
    <outputs>
        <data name="output" format="vcf" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test> <!-- test 1 - standard run -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <output name="output" file="expected_output.vcf" lines_diff="2"/>
        </test>
        <test> <!-- test 2 - add reads into report -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <param name="num_reads_report" value="-1"/>
            <output name="output" file="expected_output2.vcf" lines_diff="2"/>
        </test>
        <test> <!-- test 3 - use cs_string -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <param name="cs_string" value="true"/>
            <output name="output" file="expected_outcome3.vcf" lines_diff="2"/>
        </test>
        <test> <!-- test 4 - clustering -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <param name="cluster" value="True"/>
            <output name="output" file="expected_outcome4.vcf" lines_diff="2"/>
        </test>
        <test> <!-- test 5 - Advanced - Report BND -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <param name="report_BND" value="True"/>
            <output name="output" file="expected_outcome5.vcf" lines_diff="2"/>
        </test>
        <test> <!-- test 6 - Parameter Estimation - skip -->
            <param name="input" value="reads_region.bam"/>
            <param name="output_format" value="vcf"/>
            <param name="skip_parameter_estimation" value="True"/>
            <output name="output" file="expected_outcome6.vcf" lines_diff="2"/>
        </test>
    </tests>
    <help>
        <![CDATA[
########
Sniffles
########

What is Sniffles?
*****************
Sniffles is a SV caller for long reads. It is mainly designed for PacBio reads, but also works on Oxford Nanopore reads. SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication). Furthermore, Sniffles incorporates multiple auto tuning functions to determine data set depending parameter to reduce the overall risk of falsely infer SVs.

Quick Start
***********

Make sure you have a sorted bam file either from ngmlr or from bwa. For the later make sure you have used -M parameter for mapping to mark which alignments are primary and which are secondary! Note you have to adjust the parameters for low coverage cases.

Parameters
**********

General
-------

+---------------------------+-----------------------------------------------------------------------+
| Parameter                 | Description                                                           |
+===========================+=======================================================================+
| Minimum Support           | Minimum number of reads that support a SV to be reported. Default: 10 |
+---------------------------+-----------------------------------------------------------------------+
| Maximum Number of Splits  | Maximum number of split segments a read is aligned at before it is    |
|                           | ignored. Default: 7                                                   |
+---------------------------+-----------------------------------------------------------------------+
| Maximum Distance          | Maximum distance to group SV together. Sniffles estimates this        |
|                           | parameter during runtime to group together SVs reported by different  |
|                           | reads. Default: 1kb                                                   |
+---------------------------+-----------------------------------------------------------------------+
| Minimum Length            | Minimum length of SV to be reported. Default: 30bp                    |
+---------------------------+-----------------------------------------------------------------------+
| Minimum Mapping Quality   | Minimum mapping quality of alignment to be taken into account.        |
|                           | Default: 20                                                           |
+---------------------------+-----------------------------------------------------------------------+
| Number of Reads to Report | Number of read names to be reported that support the SV in the vcf    |
|                           | file. Default: 0                                                      |
+---------------------------+-----------------------------------------------------------------------+
| Minimum Seq Size          | Discard read if none of its segment is larger then this. Default: 2kb |
+---------------------------+-----------------------------------------------------------------------+
| Minimum ZMW               | Discard SV that are not supported by at least x zmws. This applies    |
|                           | only for PacBio recognizable reads. Default: 0                        |
+---------------------------+-----------------------------------------------------------------------+
| Enable CS String          | Enables the scan of CS string instead of Cigar and MD.  Default: False|
+---------------------------+-----------------------------------------------------------------------+

|

Clustering Options
------------------

+----------------------------------------+-----------------------------------------------------------------------+
| Parameter                              | Description                                                           |
+========================================+=======================================================================+
| Cluster                                | Performs read based phasing to mark SVs that occur together.          |
+----------------------------------------+-----------------------------------------------------------------------+
| Cluster Support                        | Minimum number of reads supporting clustering of SV. Default: 1       |
+----------------------------------------+-----------------------------------------------------------------------+
| Allele Frequency Threshold             | Filters the SV calls based on the allele frequency. Default: 0.0      |
+----------------------------------------+-----------------------------------------------------------------------+
| Minimum Homogenous Allele Frequency    | Minimum homogeneous threshold on allele frequency. Default: 0.8       |
+----------------------------------------+-----------------------------------------------------------------------+
| Minimum Heterogeneous Allele Frequency | Minimum heterogeneous threshold on allele frequency. Default: 0.3     |
+----------------------------------------+-----------------------------------------------------------------------+

|

Advanced Options
----------------

+----------------------------------------+------------------------------------------------------------------------+
| Parameter                              | Description                                                            |
+========================================+========================================================================+
| Report BND                             | Reports the inversions and translocations as BND events. Default: False|
+----------------------------------------+------------------------------------------------------------------------+
| Don't Report Seq                       | Don't report sequences for indels in vcf output. (Beta version!)       |
|                                        | Default: False                                                         |
+----------------------------------------+------------------------------------------------------------------------+
| Ignore sd                              | Ignores the sd based filtering. Default: False                         |
+----------------------------------------+------------------------------------------------------------------------+
| CCS Reads                              | Preset CCS Pacbio setting. (Beta) Default: False                       |
+----------------------------------------+------------------------------------------------------------------------+

|

Parameter Estimation Options
----------------------------

+----------------------------------------+------------------------------------------------------------------------+
| Parameter                              | Description                                                            |
+========================================+========================================================================+
| Skip Parameter Estimation              | Enables the scan if only very few reads are present. Default: False    |
+----------------------------------------+------------------------------------------------------------------------+
| Estimated Deletion Ratio               | Estimated ratio of deletions per read.  Default: 0.0458369             |
+----------------------------------------+------------------------------------------------------------------------+
| Estimated Insertion Ratio              | Estimated ratio of insertions per read. Default: 0.049379              |
+----------------------------------------+------------------------------------------------------------------------+
| Maximum Differences Per Window         | Maximum differences per 100bp. Default: 50                             |
+----------------------------------------+------------------------------------------------------------------------+
| Maximum Distance Between Alignment     | Maximum distance between alignment (indel) events. Default: 4          |
| Events                                 |                                                                        |
+----------------------------------------+------------------------------------------------------------------------+

|

Output
******

VCF Info field description
|
Sniffles report multiple information in the Info field. The entries are delimited by:
|
+-------------------+------------------------------------------------------------------------------------------------------+
| IMPRECISE/PRECISE | Indicates the confidence of the exact breakpoint positions (bp).                                     |
+-------------------+------------------------------------------------------------------------------------------------------+
| CHR2=             | The chromosome of the second breakpoint of the SV reported.                                          |
+-------------------+------------------------------------------------------------------------------------------------------+
| END=              | The position (bp) of the second breakpoint of the SV reported.                                       |
+-------------------+------------------------------------------------------------------------------------------------------+
| ZMW=              | For PacBio based reads, shows the number of ZMW that support the SV.                                 |
+-------------------+------------------------------------------------------------------------------------------------------+
| SVTYPE=           | The type of the SV. (see Alt field above)                                                            |
+-------------------+------------------------------------------------------------------------------------------------------+
| SUPTYPE=          | Indicates what evidence supports the SVs (SR: Split Reads, AL: Alignment, NR: Noisy Region).         |
+-------------------+------------------------------------------------------------------------------------------------------+
| STD_quant_start=  | The standard deviation of the start breakpoints.                                                     |
+-------------------+------------------------------------------------------------------------------------------------------+
| STD_quant_stop=   | The standard deviation of the stop breakpoints.                                                      |
+-------------------+------------------------------------------------------------------------------------------------------+
| RNAMES=           | A comma separated list of read names that support the SV event. Controlled by -n Parameter.          |
+-------------------+------------------------------------------------------------------------------------------------------+
| SVLEN=            | Indicates the length of SVs.                                                                         |
+-------------------+------------------------------------------------------------------------------------------------------+
| STRANDS=          | Strand information at both breakpoints.                                                              |
+-------------------+------------------------------------------------------------------------------------------------------+
| SEQ=              | If reportable shows the sequence of the indels.                                                      |
+-------------------+------------------------------------------------------------------------------------------------------+
| RE=               | Number of reads supporting the variance.                                                             |
+-------------------+------------------------------------------------------------------------------------------------------+
| AF=               | Allele frequency (only if run with –genotype)                                                        |
+-------------------+------------------------------------------------------------------------------------------------------+
|
Source: https://github.com/fritzsedlazeck/Sniffles/wiki
    ]]>
    </help>
    <citations>
        <citation type="doi">10.1038/s41592-018-0001-7</citation>
    </citations>
</tool>