comparison sniffles.xml @ 4:43fffeed243f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc commit 03fbb13717809f9198ab113192d241599705ef7b
author iuc
date Sun, 29 Sep 2024 10:23:36 +0000
parents 09f5c6f3088a
children
comparison
equal deleted inserted replaced
3:09f5c6f3088a 4:43fffeed243f
1 <tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0" profile="23.0"> 1 <tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy1" profile="23.0">
2 <description>Structural variation caller using third generation sequencing</description> 2 <description>Structural variation caller using third generation sequencing</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">2.4</token> 4 <token name="@TOOL_VERSION@">2.4</token>
5 </macros> 5 </macros>
6 <xrefs> 6 <xrefs>
14 </version_command> 14 </version_command>
15 <command detect_errors="exit_code"> 15 <command detect_errors="exit_code">
16 <![CDATA[ 16 <![CDATA[
17 ln -f -s '${input}' input.bam && 17 ln -f -s '${input}' input.bam &&
18 ln -f -s '${input.metadata.bam_index}' input.bam.bai && 18 ln -f -s '${input.metadata.bam_index}' input.bam.bai &&
19 #if $reference_genome.genome_type_select != "None": 19 #if str($reference_genome.genome_type_select) != "None":
20 #if $reference_genome.genome_type_select == "indexed": 20 #if str($reference_genome.genome_type_select) == "indexed":
21 ln -f -s '${reference_genome.genome.fields.path}' 'reference.fa' && 21 ln -f -s '${reference_genome.genome.fields.path}' 'reference.fa' &&
22 #else: 22 #else:
23 ln -f -s '${reference_genome.genome}' 'reference.fa' && 23 ln -f -s '${reference_genome.genome}' 'reference.fa' &&
24 #end if 24 #end if
25 #end if 25 #end if
27 -t \${GALAXY_SLOTS:-2} 27 -t \${GALAXY_SLOTS:-2}
28 -i 'input.bam' 28 -i 'input.bam'
29 -v '$output' 29 -v '$output'
30 ## must set allow-overwrite since the new output vcf file exists 30 ## must set allow-overwrite since the new output vcf file exists
31 --allow-overwrite 31 --allow-overwrite
32 #if $reference_genome.genome_type_select != "None": 32 #if str($reference_genome.genome_type_select) != "None":
33 --reference 'reference.fa' 33 --reference 'reference.fa'
34 #end if 34 #end if
35 ## general_options 35 ## general_options
36 --minsupport '$general_options.minsupport' 36 --minsupport '$general_options.minsupport'
37 --max-splits-kb '$general_options.maxsplitskb' 37 --max-splits-kb '$general_options.maxsplitskb'
38 --minsvlen '$general_options.minsvlen' 38 --minsvlen '$general_options.minsvlen'
39 --mapq '$general_options.mapq' 39 --mapq '$general_options.mapq'
40 --min-alignment-length '$general_options.minalignmentlength' 40 --min-alignment-length '$general_options.minalignmentlength'
41 #if str($detectlargeins) == "0":
42 --detect-large-ins '0'
43 #end if
41 ## clustering_options 44 ## clustering_options
42 --cluster-binsize '$clustering_options.clusterbinsize' 45 --cluster-binsize '$clustering_options.clusterbinsize'
43 --cluster-r '$clustering_options.clusterr' 46 --cluster-r '$clustering_options.clusterr'
44 ## advanced_options 47 ## advanced_options
45 $advanced_options.mosaic 48 $advanced_options.mosaic
53 <option value="None" selected="True">No reference fasta - do not report DEL SV sequence</option> 56 <option value="None" selected="True">No reference fasta - do not report DEL SV sequence</option>
54 <option value="indexed">Use a Galaxy server built-in genome</option> 57 <option value="indexed">Use a Galaxy server built-in genome</option>
55 <option value="history">Use a genome fasta file from the current history</option> 58 <option value="history">Use a genome fasta file from the current history</option>
56 </param> 59 </param>
57 <when value="None"> 60 <when value="None">
58 <param name="genome" type="text" value="None"/> 61 <param name="genome" type="hidden" value="None"/>
59 </when> 62 </when>
60 <when value="indexed"> 63 <when value="indexed">
61 <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome" 64 <param name="genome" type="select" optional="false" label="Select a built in reference genome or custom genome"
62 help="If not listed, add a custom genome or use a reference genome from the history"> 65 help="If not listed, add a custom genome or use a reference genome from the history">
63 <options from_data_table="all_fasta"> 66 <options from_data_table="all_fasta">
69 <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/> 72 <param name="genome" type="data" format="fasta" optional="false" label="Select the reference genome fasta from the current history"/>
70 </when> 73 </when>
71 </conditional> 74 </conditional>
72 <section name="general_options" title="Set general options" expanded="False"> 75 <section name="general_options" title="Set general options" expanded="False">
73 <param argument="--minsupport" type="text" value="auto" label="Minimum Support" help="Minimum number of reads that support a SV. [auto]. Smaller support values -> more SV reported"/> 76 <param argument="--minsupport" type="text" value="auto" label="Minimum Support" help="Minimum number of reads that support a SV. [auto]. Smaller support values -> more SV reported"/>
74 <param name="maxsplitskb" type="float" value="0.1" min="0" label="Maximum Number of Splits per KB" help="Additional number of splits per kilobase read sequence allowed before reads are ignored [0.1]" /> 77 <param argument="--maxsplitskb" type="float" value="0.1" min="0" label="Maximum Number of Splits per KB" help="Additional number of splits per kilobase read sequence allowed before reads are ignored [0.1]" />
75 <param name="minsvlen" type="integer" value="50" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [50]"/> 78 <param argument="--minsvlen" type="integer" value="50" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [50]"/>
76 <param name="mapq" type="integer" value="20" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality to consider. [20]"/> 79 <param argument="--mapq" type="integer" value="20" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality to consider. [20]"/>
77 <param name="minalignmentlength" type="integer" value="100" min="0" label="Minimum alignment length" help="Reads with alignments shorter than this length (in bp) will be ignored"/> 80 <param argument="--minalignmentlength" type="integer" value="100" min="0" label="Minimum alignment length" help="Reads with alignments shorter than this length (in bp) will be ignored"/>
81 <param name="detectlargeins" type="boolean" truevalue="1" falsevalue="0" display="radio" checked="true" label="Detect very large insertions spanning multiple reads"
82 help="This sometimes shows enormous features."/>
78 </section> 83 </section>
79 <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False"> 84 <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False">
80 <param argument="--clusterbinsize" value="100" type="integer" min="0" label="Cluster bin size" help="Initial screening bin size [100]"/> 85 <param argument="--clusterbinsize" value="100" type="integer" min="0" label="Cluster bin size" help="Initial screening bin size [100]"/>
81 <param argument="--clusterr" type="float" value="2.5" min="0.0" label="Cluster Multiplier" help="Multiplier for SV start position standard deviation criterion in cluster merging [2.5]"/> 86 <param argument="--clusterr" type="float" value="2.5" min="0.0" label="Cluster Multiplier" help="Multiplier for SV start position standard deviation criterion in cluster merging [2.5]"/>
82 </section> 87 </section>
83 <section name="advanced_options" title="Advanced options" expanded="False"> 88 <section name="advanced_options" title="Advanced options" expanded="False">
84 <param name="mosaic" type="boolean" value="False" truevalue="--mosaic" falsevalue="" label="Mosaic mode" help="Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)" /> 89 <param argument="--mosaic" type="boolean" display="radio" checked="false" truevalue="--mosaic" falsevalue="" label="Mosaic mode" help="Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)" />
85 </section> 90 </section>
86 </inputs> 91 </inputs>
87 <outputs> 92 <outputs>
88 <data name="output" format="vcf" label="${tool.name} on ${on_string}"/> 93 <data name="output" format="vcf" label="${tool.name} on ${on_string}"/>
89 </outputs> 94 </outputs>
90 <tests> 95 <tests>
91 <test> <!-- test 1 - standard run --> 96 <test> <!-- test 1 - standard run -->
92 <param name="input" value="reads_region.bam"/> 97 <param name="input" value="reads_region.bam"/>
98 <param name="detectlargeins" value="0"/>
93 <output name="output" file="expected_output.vcf" lines_diff="4"/> 99 <output name="output" file="expected_output.vcf" lines_diff="4"/>
94 </test> 100 </test>
95 <test> <!-- test 2 - filter on mapq --> 101 <test> <!-- test 2 - filter on mapq -->
96 <param name="input" value="reads_region.bam"/> 102 <param name="input" value="reads_region.bam"/>
97 <param name="mapq" value="0"/> 103 <param name="mapq" value="0"/>
128 Sniffles 134 Sniffles
129 ######## 135 ########
130 136
131 What is Sniffles? 137 What is Sniffles?
132 ***************** 138 *****************
139
133 Sniffles is a SV caller for long reads. Sniffles2 accurately detect SVs on germline, somatic and population-level for PacBio and Oxford Nanopore read data. 140 Sniffles is a SV caller for long reads. Sniffles2 accurately detect SVs on germline, somatic and population-level for PacBio and Oxford Nanopore read data.
134 141
135 SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). 142 SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations).
136 Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication). 143 Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication).
137 144
145 ----
146
138 Inputs 147 Inputs
139 ****** 148 ******
140 149
141 Known to work with Minimap2 bam as input 150 Known to work with Minimap2 bam as input
151 Optional reference fasta with matching contig names will allow deletions to be determined.
152
153 ----
142 154
143 Parameters 155 Parameters
144 ********** 156 **********
145 157
158
146 General 159 General
147 ------- 160 -------
148 161
149 162
150 +---------------------------+-----------------------------------------------------------------------+ 163 +----------------------------+-------------------------------------------------------------------------+
151 | Parameter | Description | 164 | Parameter | Description |
152 +===========================+=======================================================================+ 165 +============================+=========================================================================+
153 | Minimum Support | Minimum number of reads supporting a SV to be reported. Default:auto | 166 | Minimum Support | Minimum number of reads supporting a SV to be reported. Default:auto |
154 +---------------------------+-----------------------------------------------------------------------+ 167 +----------------------------+-------------------------------------------------------------------------+
155 | Maximum Number of Splits | Maximum number of split segments per kb a read is aligned at before | 168 | Maximum Number of Splits | Maximum number of split segments per kb a read is aligned at before |
156 | | it is ignored. Default: 7 | 169 | | it is ignored. Default: 7 |
157 +---------------------------+-----------------------------------------------------------------------+ 170 +----------------------------+-------------------------------------------------------------------------+
158 | Minimum SV Length | Minimum length of SV to be reported. Default: 50bp | 171 | Minimum SV Length | Minimum length of SV to be reported. Default: 50bp |
159 +---------------------------+-----------------------------------------------------------------------+ 172 +----------------------------+-------------------------------------------------------------------------+
160 | Minimum Mapping Quality | Minimum mapping quality of alignment to be taken into account. | 173 | Minimum Mapping Quality | Minimum mapping quality of alignment to be taken into account. |
161 | | Default: 20 | 174 | | Default: 20 |
162 +---------------------------+-----------------------------------------------------------------------+ 175 +----------------------------+-------------------------------------------------------------------------+
163 | Minimum alignment length | Reads with less length aligned will be ignored. Default 100 | 176 | Minimum alignment length | Reads with less length aligned will be ignored. Default 100 |
164 +---------------------------+-----------------------------------------------------------------------+ 177 +----------------------------+-------------------------------------------------------------------------+
178
179
165 180
166 181
167 Clustering Options 182 Clustering Options
168 ------------------ 183 ------------------
169 184
170 185
171 +----------------------------------------+-----------------------------------------------------------------------+ 186 +---------------------+------------------------------------------------------------------------+
172 | Parameter | Description | 187 | Parameter | Description |
173 +========================================+=======================================================================+ 188 +=====================+========================================================================+
174 | Cluster bin size | Initial cluster bin size. Default 100 | 189 | Cluster bin size | Initial cluster bin size. Default 100 |
175 +----------------------------------------+-----------------------------------------------------------------------+ 190 +---------------------+------------------------------------------------------------------------+
176 | Cluster Multiplier | Multiplier for SV start position standard deviation criterion in | 191 | Cluster Multiplier | Multiplier for SV start position standard deviation criterion in |
177 | | cluster merging [2.5] | 192 | | cluster merging [2.5] |
178 +----------------------------------------+-----------------------------------------------------------------------+ 193 +---------------------+------------------------------------------------------------------------+
194
195
179 196
180 197
181 Advanced Options 198 Advanced Options
182 ---------------- 199 ----------------
183 200
184 201
185 +----------------------------------------+------------------------------------------------------------------------------+ 202 +-------------+--------------------------------------------------------------------------------+
186 | Parameter | Description | 203 | Parameter | Description |
187 +========================================+==============================================================================+ 204 +=============+================================================================================+
188 | Mosaic | Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)| 205 | Mosaic | Set Sniffles run mode to detect rare, somatic and mosaic SVs (default: False)|
189 +----------------------------------------+------------------------------------------------------------------------------+ 206 +-------------+--------------------------------------------------------------------------------+
190 207
191 208
192 Output 209 ----
210
211 VCF information fields from the VCF header
212 ******************************************
213
214 +------------------+-----------+-----------------------------------------------------------------------------------------------+
215 | Field | Type | Description |
216 +==================+===========+===============================================================================================+
217 | PRECISE | Flag | Structural variation with precise breakpoints |
218 +------------------+-----------+-----------------------------------------------------------------------------------------------+
219 | IMPRECISE | Flag | Structural variation with imprecise breakpoints |
220 +------------------+-----------+-----------------------------------------------------------------------------------------------+
221 | MOSAIC | Flag | Structural variation classified as putative mosaic |
222 +------------------+-----------+-----------------------------------------------------------------------------------------------+
223 | SVLEN | Integer | Length of structural variation |
224 +------------------+-----------+-----------------------------------------------------------------------------------------------+
225 | SVTYPE | String | Type of structural variation |
226 +------------------+-----------+-----------------------------------------------------------------------------------------------+
227 | CHR2 | String | Mate chromsome for BND SVs |
228 +------------------+-----------+-----------------------------------------------------------------------------------------------+
229 | SUPPORT | Integer| Number of reads supporting the structural variation |
230 +------------------+-----------+-----------------------------------------------------------------------------------------------+
231 | SUPPORT_INLINE | Integer| Number of reads supporting an INS/DEL SV (non-split events only) |
232 +------------------+-----------+-----------------------------------------------------------------------------------------------+
233 | SUPPORT_LONG | Integer| Number of soft-clipped reads putatively supporting the long insertion SV |
234 +------------------+-----------+-----------------------------------------------------------------------------------------------+
235 | END | Integer| End position of structural variation |
236 +------------------+-----------+-----------------------------------------------------------------------------------------------+
237 | STDEV_POS | Float | Standard deviation of structural variation start position |
238 +------------------+-----------+-----------------------------------------------------------------------------------------------+
239 | STDEV_LEN | Float | Standard deviation of structural variation length |
240 +------------------+-----------+-----------------------------------------------------------------------------------------------+
241 | COVERAGE | Float | Coverage near upstream start, center, end, downstream of structural variation |
242 +------------------+-----------+-----------------------------------------------------------------------------------------------+
243 | STRAND | String | Strands of supporting reads for structural variant |
244 +------------------+-----------+-----------------------------------------------------------------------------------------------+
245 | AC | Integer| Allele count summed up over all samples |
246 +------------------+-----------+-----------------------------------------------------------------------------------------------+
247 | SUPP_VEC | String | List of read support for all samples |
248 +------------------+-----------+-----------------------------------------------------------------------------------------------+
249 | CONSENSUS_SUP | Integer| Number of reads that support the generated insertion (INS) consensus sequence |
250 +------------------+-----------+-----------------------------------------------------------------------------------------------+
251 | RNAMES | String | Names of supporting reads (if enabled with --output-rnames) |
252 +------------------+-----------+-----------------------------------------------------------------------------------------------+
253 | AF | Float | Allele Frequency |
254 +------------------+-----------+-----------------------------------------------------------------------------------------------+
255 | NM | Float | Mean number of query alignment length adjusted mismatches of supporting reads |
256 +------------------+-----------+-----------------------------------------------------------------------------------------------+
257 | PHASE | String | Phasing information derived from supporting reads |
258 +------------------+-----------+-----------------------------------------------------------------------------------------------+
259
260
261 ----
262
263
264 Source
193 ****** 265 ******
194 266
195 VCF Info field description 267 https://github.com/fritzsedlazeck/Sniffles
196 268
197 Sniffles report multiple information in the Info field. The entries are delimited by:
198
199 +-------------------+------------------------------------------------------------------------------------------------------+
200 | IMPRECISE/PRECISE | Indicates the confidence of the exact breakpoint positions (bp). |
201 +-------------------+------------------------------------------------------------------------------------------------------+
202 | CHR2= | The chromosome of the second breakpoint of the SV reported. |
203 +-------------------+------------------------------------------------------------------------------------------------------+
204 | END= | The position (bp) of the second breakpoint of the SV reported. |
205 +-------------------+------------------------------------------------------------------------------------------------------+
206 | ZMW= | For PacBio based reads, shows the number of ZMW that support the SV. |
207 +-------------------+------------------------------------------------------------------------------------------------------+
208 | SVTYPE= | The type of the SV. (see Alt field above) |
209 +-------------------+------------------------------------------------------------------------------------------------------+
210 | SUPTYPE= | Indicates what evidence supports the SVs (SR: Split Reads, AL: Alignment, NR: Noisy Region). |
211 +-------------------+------------------------------------------------------------------------------------------------------+
212 | STD_quant_start= | The standard deviation of the start breakpoints. |
213 +-------------------+------------------------------------------------------------------------------------------------------+
214 | STD_quant_stop= | The standard deviation of the stop breakpoints. |
215 +-------------------+------------------------------------------------------------------------------------------------------+
216 | RNAMES= | A comma separated list of read names that support the SV event. Controlled by -n Parameter. |
217 +-------------------+------------------------------------------------------------------------------------------------------+
218 | SVLEN= | Indicates the length of SVs. |
219 +-------------------+------------------------------------------------------------------------------------------------------+
220 | STRANDS= | Strand information at both breakpoints. |
221 +-------------------+------------------------------------------------------------------------------------------------------+
222 | SEQ= | If reportable shows the sequence of the indels. |
223 +-------------------+------------------------------------------------------------------------------------------------------+
224 | RE= | Number of reads supporting the variance. |
225 +-------------------+------------------------------------------------------------------------------------------------------+
226 | AF= | Allele frequency (only if run with –genotype) |
227 +-------------------+------------------------------------------------------------------------------------------------------+
228
229 Source: https://github.com/fritzsedlazeck/Sniffles
230 ]]> 269 ]]>
231 </help> 270 </help>
232 <citations> 271 <citations>
233 <citation type="doi">10.1038/s41587-023-02024-y</citation> 272 <citation type="doi">10.1038/s41587-023-02024-y</citation>
234 </citations> 273 </citations>