view sniffles.xml @ 4:43fffeed243f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc commit 03fbb13717809f9198ab113192d241599705ef7b
author iuc
date Sun, 29 Sep 2024 10:23:36 +0000
parents 09f5c6f3088a
children
line wrap: on
line source

<tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy1" profile="23.0">
    <description>Structural variation caller using third generation sequencing</description>
    <macros>
        <token name="@TOOL_VERSION@">2.4</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">sniffles</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">sniffles</requirement>
    </requirements>
    <version_command>
        <![CDATA[ sniffles --version ]]>
    </version_command>
    <command detect_errors="exit_code">
        <![CDATA[
ln -f -s '${input}' input.bam &&
ln -f -s '${input.metadata.bam_index}' input.bam.bai &&
#if str($reference_genome.genome_type_select) != "None":
    #if str($reference_genome.genome_type_select) == "indexed":
        ln -f -s '${reference_genome.genome.fields.path}' 'reference.fa' &&
    #else:
        ln -f -s '${reference_genome.genome}' 'reference.fa' &&
    #end if
#end if
sniffles
-t \${GALAXY_SLOTS:-2}
-i 'input.bam'
-v '$output'
## must set allow-overwrite since the new output vcf file exists
--allow-overwrite
#if str($reference_genome.genome_type_select) != "None":
   --reference 'reference.fa' 
#end if
## general_options
    --minsupport '$general_options.minsupport'
    --max-splits-kb '$general_options.maxsplitskb'
    --minsvlen '$general_options.minsvlen'
    --mapq '$general_options.mapq'
    --min-alignment-length '$general_options.minalignmentlength'
    #if str($detectlargeins) == "0":
        --detect-large-ins '0'
    #end if
## clustering_options
    --cluster-binsize '$clustering_options.clusterbinsize'
    --cluster-r '$clustering_options.clusterr'
## advanced_options 
    $advanced_options.mosaic
    ]]>
    </command>
    <inputs>
        <param type="data" name="input" format="bam" label="Input BAM file"/>
        <conditional name="reference_genome">
            <param name="genome_type_select" type="select" label="Reference genome source is required for deletion SV sequence reporting" 
              help="Optional: Select None, a built-in or history reference genome fasta">
                <option value="None" selected="True">No reference fasta - do not report DEL SV sequence</option>
                <option value="indexed">Use a Galaxy server built-in genome</option>
                <option value="history">Use a genome fasta file from the current history</option>
            </param>
            <when value="None">
                <param name="genome" type="hidden" value="None"/>
            </when>
            <when value="indexed">
                <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome" 
                  help="If not listed, add a custom genome or use a reference genome from the history">
                    <options from_data_table="all_fasta">
                        <validator message="No genomes are available " type="no_options"/>
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/>
            </when>
        </conditional>
        <section name="general_options" title="Set general options" expanded="False">
            <param argument="--minsupport" type="text" value="auto" label="Minimum Support" help="Minimum number of reads that support a SV. [auto]. Smaller support values -> more SV reported"/>
            <param argument="--maxsplitskb" type="float" value="0.1" min="0" label="Maximum Number of Splits per KB" help="Additional number of splits per kilobase read sequence allowed before reads are ignored [0.1]" />
            <param argument="--minsvlen" type="integer" value="50" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [50]"/>
            <param argument="--mapq" type="integer" value="20" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality to consider. [20]"/>
            <param argument="--minalignmentlength" type="integer" value="100" min="0" label="Minimum alignment length" help="Reads with alignments shorter than this length (in bp) will be ignored"/>
            <param name="detectlargeins" type="boolean" truevalue="1" falsevalue="0" display="radio" checked="true" label="Detect very large insertions spanning multiple reads" 
               help="This sometimes shows enormous features."/>
        </section>
        <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False">
            <param argument="--clusterbinsize" value="100" type="integer" min="0" label="Cluster bin size" help="Initial screening bin size [100]"/>
            <param argument="--clusterr" type="float" value="2.5" min="0.0" label="Cluster Multiplier" help="Multiplier for SV start position standard deviation criterion in cluster merging [2.5]"/>
        </section>
        <section name="advanced_options" title="Advanced options" expanded="False">
            <param argument="--mosaic" type="boolean" display="radio" checked="false"  truevalue="--mosaic" falsevalue="" label="Mosaic mode" help="Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)" />
        </section>
    </inputs>
    <outputs>
        <data name="output" format="vcf" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test> <!-- test 1 - standard run -->
            <param name="input" value="reads_region.bam"/>
            <param name="detectlargeins" value="0"/>
            <output name="output" file="expected_output.vcf" lines_diff="4"/>
        </test>
        <test> <!-- test 2 - filter on mapq -->
            <param name="input" value="reads_region.bam"/>
            <param name="mapq" value="0"/>
            <output name="output" file="expected_output2.vcf" lines_diff="4"/>
        </test>
        <test> <!-- test 3 min support test -->
            <param name="input" value="reads_region.bam"/>
            <param name="minsupport" value="1"/>
            <output name="output" file="expected_outcome3.vcf" lines_diff="4"/>
        </test>
        <test> <!-- test 4 - clustering -->
            <param name="input" value="reads_region.bam"/>
            <param name="clusterbinsize" value="5"/>
            <output name="output" file="expected_outcome4.vcf" lines_diff="4"/>
        </test>
        <test> <!-- test 5 - Advanced - mosaic -->
            <param name="input" value="reads_region.bam"/>
            <param name="mosaic" value="--mosaic"/>
            <output name="output" file="expected_outcome5.vcf" lines_diff="4"/>
        </test>
       <test> <!-- test 6 - reference -->
            <param name="input" value="reads_region.bam"/>
            <conditional name="reference_genome">
                <param name="genome_type_select" value="history"/>
                <param name="genome" value="humsamp.fa"/>
                <param name="genome.ext" value="fasta"/>
            </conditional>
            <output name="output" file="expected_outcome6.vcf" lines_diff="4"/>
        </test>
    </tests>
    <help>
        <![CDATA[
########
Sniffles
########

What is Sniffles?
*****************

Sniffles is a SV caller for long reads. Sniffles2 accurately detect SVs on germline, somatic and population-level for PacBio and Oxford Nanopore read data.

SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). 
Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication).

----

Inputs
******

Known to work with Minimap2 bam as input
Optional reference fasta with matching contig names will allow deletions to be determined.

----

Parameters
**********


General
-------


+----------------------------+-------------------------------------------------------------------------+
|  Parameter                 |   Description                                                           |
+============================+=========================================================================+
|  Minimum Support           |   Minimum number of reads supporting a SV to be reported. Default:auto  |
+----------------------------+-------------------------------------------------------------------------+
|  Maximum Number of Splits  |   Maximum number of split segments per kb a read is aligned at before   |
|                            |   it is ignored. Default: 7                                             |
+----------------------------+-------------------------------------------------------------------------+
|  Minimum SV Length         |   Minimum length of SV to be reported. Default: 50bp                    |
+----------------------------+-------------------------------------------------------------------------+
|  Minimum Mapping Quality   |   Minimum mapping quality of alignment to be taken into account.        |
|                            |   Default: 20                                                           |
+----------------------------+-------------------------------------------------------------------------+
|  Minimum alignment length  |   Reads with less length aligned will be ignored. Default 100           |
+----------------------------+-------------------------------------------------------------------------+




Clustering Options
------------------


+---------------------+------------------------------------------------------------------------+
|  Parameter          |  Description                                                           |
+=====================+========================================================================+
|  Cluster bin size   |  Initial cluster bin size. Default 100                                 |
+---------------------+------------------------------------------------------------------------+
|  Cluster Multiplier |  Multiplier for SV start position standard deviation criterion in      |
|                     |  cluster merging [2.5]                                                 |
+---------------------+------------------------------------------------------------------------+




Advanced Options
----------------


+-------------+--------------------------------------------------------------------------------+
|  Parameter  |   Description                                                                  |
+=============+================================================================================+
|  Mosaic     |   Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)|
+-------------+--------------------------------------------------------------------------------+


----

VCF information fields from the VCF header
******************************************

+------------------+-----------+-----------------------------------------------------------------------------------------------+
|   Field          |   Type    |                     Description                                                               |
+==================+===========+===============================================================================================+
|  PRECISE         |   Flag    |                Structural variation with precise breakpoints                                  |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  IMPRECISE       |   Flag    |                Structural variation with imprecise breakpoints                                |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  MOSAIC          |   Flag    |                Structural variation classified as putative mosaic                             |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SVLEN           |   Integer |                Length of structural variation                                                 |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SVTYPE          |    String |                Type of structural variation                                                   |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  CHR2            |    String |                Mate chromsome for BND SVs                                                     |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SUPPORT         |    Integer|                Number of reads supporting the structural variation                            |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SUPPORT_INLINE  |    Integer|                Number of reads supporting an INS/DEL SV (non-split events only)               |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SUPPORT_LONG    |    Integer|                Number of soft-clipped reads putatively supporting the long insertion SV       |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  END             |    Integer|                End position of structural variation                                           |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  STDEV_POS       |    Float  |                Standard deviation of structural variation start position                      |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  STDEV_LEN       |    Float  |                Standard deviation of structural variation length                              |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  COVERAGE        |    Float  |                Coverage near upstream start, center, end, downstream of structural variation  |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  STRAND          |    String |                Strands of supporting reads for structural variant                             |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  AC              |    Integer|                Allele count summed up over all samples                                        |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  SUPP_VEC        |    String |                List of read support for all samples                                           |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  CONSENSUS_SUP   |    Integer|                Number of reads that support the generated insertion (INS) consensus sequence  |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  RNAMES          |    String |                Names of supporting reads (if enabled with --output-rnames)                    |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  AF              |    Float  |                Allele Frequency                                                               |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  NM              |    Float  |                Mean number of query alignment length adjusted mismatches of supporting reads  |
+------------------+-----------+-----------------------------------------------------------------------------------------------+
|  PHASE           |    String |                Phasing information derived from supporting reads                              |
+------------------+-----------+-----------------------------------------------------------------------------------------------+


----


Source
******

https://github.com/fritzsedlazeck/Sniffles

    ]]>
    </help>
    <citations>
        <citation type="doi">10.1038/s41587-023-02024-y</citation>
    </citations>
</tool>