comparison sniffles.xml @ 0:93c4b04a0769 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit f5977355970ef4994957147d2d8a96fe6605e2b4"
author iuc
date Mon, 14 Sep 2020 07:39:07 +0000
parents
children 3f6f028f418f
comparison
equal deleted inserted replaced
-1:000000000000 0:93c4b04a0769
1 <tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0">
2 <description>Structural variation caller using third generation sequencing</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.0.12</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">sniffles</requirement>
8 </requirements>
9 <version_command>
10 <![CDATA[ sniffles --version ]]>
11 </version_command>
12 <command detect_errors="exit_code">
13 <![CDATA[
14 ln -f -s '${input}' input.bam &&
15 ln -f -s '${input.metadata.bam_index}' input.bam.bai &&
16
17 sniffles
18 -t \${GALAXY_SLOTS:-2}
19 -m 'input.bam'
20 -v '$output'
21 ## general_options
22 #if $general_options.min_support:
23 --min_support $general_options.min_support
24 #end if
25 --max_num_splits $general_options.max_num_splits
26 #if $general_options.max_distance:
27 --max_distance $general_options.max_distance
28 #end if
29 #if $general_options.min_length:
30 --min_length $general_options.min_length
31 #end if
32 --minmapping_qual $general_options.minmapping_qual
33 --num_reads_report $general_options.num_reads_report
34 #if $general_options.min_seq_size:
35 --min_seq_size $general_options.min_seq_size
36 #end if
37 --min_zmw $general_options.min_zmw
38 $general_options.cs_string
39 ## clustering_options
40 $clustering_options.cluster
41 #if $clustering_options.cluster_support:
42 --cluster_support $clustering_options.cluster_support
43 #end if
44 --allelefreq $clustering_options.allelefreq
45 --min_homo_af $clustering_options.min_homo_af
46 --min_het_af $clustering_options.min_het_af
47 ##advanced_options
48 $advanced_options.report_BND
49 $advanced_options.not_report_seq
50 $advanced_options.ignore_sd
51 $advanced_options.ccs_reads
52 ## parameter_estimation_options
53 $parameter_estimation_options.skip_parameter_estimation
54 --del_ratio $parameter_estimation_options.del_ratio
55 --ins_ratio $parameter_estimation_options.ins_ratio
56 --max_diff_per_window $parameter_estimation_options.max_diff_per_window
57 --max_dist_aln_events $parameter_estimation_options.max_dist_aln_events
58 ]]>
59 </command>
60 <inputs>
61 <param type="data" name="input" format="bam" label="Input BAM file"/>
62 <section name="general_options" title="Set general options" expanded="False">
63 <param argument="--min_support" type="integer" value="10" optional="true" min="1" label="Minimum Support" help="Minimum number of reads that support a SV. [10]" />
64 <param argument="--max_num_splits" type="integer" value="7" optional="true" min="0" label="Maximum Number of Splits" help="Maximum number of splits per read to be still taken into account. [7]" />
65 <param argument="--max_distance" type="integer" value="1000" optional="true" min="10" label="Maximum Distance" help="Maximum distance to group SV together. [1000]" />
66 <param argument="--min_length" type="integer" value="30" optional="true" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [30]"/>
67 <param argument="--minmapping_qual" type="integer" value="20" optional="true" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality. [20]"/>
68 <param argument="--num_reads_report" type="integer" value="0" optional="true" min="-1" label="Number of reads to report" help="Report up to N reads that support the SV in the vcf file. -1: report all. [0]"/>
69 <param argument="--min_seq_size" type="integer" value="" optional="true" label="Minimum Seq Size" help="Discard read if non of its segment is larger then this. [2000]"/>
70 <param argument="--min_zmw" type="integer" value="0" optional="true" min="0" label="Minimum ZMW" help="Discard SV that are not supported by at least x zmws. This applies only for PacBio recognizable reads. [0]"/>
71 <param argument="--cs_string" type="boolean" truevalue="--cs_string" falsevalue="" optional="true" label="Enable CS String" help="Enables the scan of CS string instead of Cigar and MD. [false]"/>
72 </section>
73 <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False">
74 <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" optional="true" label="Cluster" help="Enables Sniffles to phase SVs that occur on the same reads [false]"/>
75 <param argument="--cluster_support" type="integer" value="1" optional="true" min="1" label="Cluster Support" help="Minimum number of reads supporting clustering of SV. [1]"/>
76 <param argument="--allelefreq" type="float" value="0" optional="true" min="0" label="Allele Frequency Threshold" help="Filters the SV calls based on the allele frequency. [0]" />
77 <param argument="--min_homo_af" type="float" value="0.8" optional="true" min="0" max="1" label="Minimum Homogenous Allele Frequency" help="Minimum homogeneous threshold on allele frequency (0-1). [0.8]"/>
78 <param argument="--min_het_af" type="float" value="0.3" optional="true" min="0" max="1" label="Minimum Heterogeneous Allele Frequency" help="Minimum heterogeneous threshold on allele frequency (0-1). [0.3]"/>
79 </section>
80 <section name="advanced_options" title="Advanced options" expanded="False">
81 <param argument="--report_BND" type="boolean" value="True" truevalue="--report_BND" falsevalue="" optional="true" label="Report BND" help="Report BND instead of Tra in vcf output. [true]" />
82 <param argument="--not_report_seq" type="boolean" value="False" truevalue="--not_report_seq" falsevalue="" optional="true" label="Don't report seq" help="Don't report sequences for indels in vcf output. (Beta version!) [false]"/>
83 <param argument="--ignore_sd" type="boolean" value="False" truevalue="--ignore_sd" falsevalue="" optional="true" label="Igonore sd" help="Ignores the sd based filtering. [false]"/>
84 <param argument="--ccs_reads" type="boolean" value="False" truevalue="--ccs_reads" falsevalue="" optional="true" label="CCS Reads" help="Preset CCS Pacbio setting. (Beta) [false]" />
85 </section>
86 <section name="parameter_estimation_options" title="Parameter Estimation Options" expanded="False">
87 <param argument="--skip_parameter_estimation" type="boolean" value="False" truevalue="--skip_parameter_estimation" falsevalue="" optional="true" label="Skip Parameter Estimation" help="Enables the scan if only very few reads are present. [false]"/>
88 <param argument="--del_ratio" type="float" value="0.0458369" optional="true" min="0" max="1" label="Estimated Deletion Ratio" help="Estimated ratio of deletions per read (0-1). [0.0458369]" />
89 <param argument="--ins_ratio" type="float" value="0.049379" optional="true" min="0" max="1" label="Estimated Insertion Ratio" help="Estimated ratio of insertions per read (0-1). [0.049379]" />
90 <param argument="--max_diff_per_window" type="integer" value="50" optional="true" min="0" label="Maximum Differences Per Window" help="Maximum differences per 100bp. [50]"/>
91 <param argument="--max_dist_aln_events" type="integer" value="4" optional="true" min="0" label="Maximum Distance Between Alignment Events" help="Maximum distance between alignment (indel) events. [4]"/>
92 </section>
93 </inputs>
94 <outputs>
95 <data name="output" format="vcf" label="${tool.name} on ${on_string}"/>
96 </outputs>
97 <tests>
98 <test> <!-- test 1 - standard run -->
99 <param name="input" value="reads_region.bam"/>
100 <param name="output_format" value="vcf"/>
101 <output name="output" file="expected_output.vcf" lines_diff="2"/>
102 </test>
103 <test> <!-- test 2 - add reads into report -->
104 <param name="input" value="reads_region.bam"/>
105 <param name="output_format" value="vcf"/>
106 <param name="num_reads_report" value="-1"/>
107 <output name="output" file="expected_output2.vcf" lines_diff="2"/>
108 </test>
109 <test> <!-- test 3 - use cs_string -->
110 <param name="input" value="reads_region.bam"/>
111 <param name="output_format" value="vcf"/>
112 <param name="cs_string" value="true"/>
113 <output name="output" file="expected_outcome3.vcf" lines_diff="2"/>
114 </test>
115 <test> <!-- test 4 - clustering -->
116 <param name="input" value="reads_region.bam"/>
117 <param name="output_format" value="vcf"/>
118 <param name="cluster" value="True"/>
119 <output name="output" file="expected_outcome4.vcf" lines_diff="2"/>
120 </test>
121 <test> <!-- test 5 - Advanced - Report BND -->
122 <param name="input" value="reads_region.bam"/>
123 <param name="output_format" value="vcf"/>
124 <param name="report_BND" value="True"/>
125 <output name="output" file="expected_outcome5.vcf" lines_diff="2"/>
126 </test>
127 <test> <!-- test 6 - Parameter Estimation - skip -->
128 <param name="input" value="reads_region.bam"/>
129 <param name="output_format" value="vcf"/>
130 <param name="skip_parameter_estimation" value="True"/>
131 <output name="output" file="expected_outcome6.vcf" lines_diff="2"/>
132 </test>
133 </tests>
134 <help>
135 <![CDATA[
136 ########
137 Sniffles
138 ########
139
140 What is Sniffles?
141 *****************
142 Sniffles is a SV caller for long reads. It is mainly designed for PacBio reads, but also works on Oxford Nanopore reads. SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication). Furthermore, Sniffles incorporates multiple auto tuning functions to determine data set depending parameter to reduce the overall risk of falsely infer SVs.
143
144 Quick Start
145 ***********
146
147 Make sure you have a sorted bam file either from ngmlr or from bwa. For the later make sure you have used -M parameter for mapping to mark which alignments are primary and which are secondary! Note you have to adjust the parameters for low coverage cases.
148
149 Parameters
150 **********
151
152 General
153 -------
154
155 +---------------------------+-----------------------------------------------------------------------+
156 | Parameter | Description |
157 +===========================+=======================================================================+
158 | Minimum Support | Minimum number of reads that support a SV to be reported. Default: 10 |
159 +---------------------------+-----------------------------------------------------------------------+
160 | Maximum Number of Splits | Maximum number of split segments a read is aligned at before it is |
161 | | ignored. Default: 7 |
162 +---------------------------+-----------------------------------------------------------------------+
163 | Maximum Distance | Maximum distance to group SV together. Sniffles estimates this |
164 | | parameter during runtime to group together SVs reported by different |
165 | | reads. Default: 1kb |
166 +---------------------------+-----------------------------------------------------------------------+
167 | Minimum Length | Minimum length of SV to be reported. Default: 30bp |
168 +---------------------------+-----------------------------------------------------------------------+
169 | Minimum Mapping Quality | Minimum mapping quality of alignment to be taken into account. |
170 | | Default: 20 |
171 +---------------------------+-----------------------------------------------------------------------+
172 | Number of Reads to Report | Number of read names to be reported that support the SV in the vcf |
173 | | file. Default: 0 |
174 +---------------------------+-----------------------------------------------------------------------+
175 | Minimum Seq Size | Discard read if none of its segment is larger then this. Default: 2kb |
176 +---------------------------+-----------------------------------------------------------------------+
177 | Minimum ZMW | Discard SV that are not supported by at least x zmws. This applies |
178 | | only for PacBio recognizable reads. Default: 0 |
179 +---------------------------+-----------------------------------------------------------------------+
180 | Enable CS String | Enables the scan of CS string instead of Cigar and MD. Default: False|
181 +---------------------------+-----------------------------------------------------------------------+
182
183 |
184
185 Clustering Options
186 ------------------
187
188 +----------------------------------------+-----------------------------------------------------------------------+
189 | Parameter | Description |
190 +========================================+=======================================================================+
191 | Cluster | Performs read based phasing to mark SVs that occur together. |
192 +----------------------------------------+-----------------------------------------------------------------------+
193 | Cluster Support | Minimum number of reads supporting clustering of SV. Default: 1 |
194 +----------------------------------------+-----------------------------------------------------------------------+
195 | Allele Frequency Threshold | Filters the SV calls based on the allele frequency. Default: 0.0 |
196 +----------------------------------------+-----------------------------------------------------------------------+
197 | Minimum Homogenous Allele Frequency | Minimum homogeneous threshold on allele frequency. Default: 0.8 |
198 +----------------------------------------+-----------------------------------------------------------------------+
199 | Minimum Heterogeneous Allele Frequency | Minimum heterogeneous threshold on allele frequency. Default: 0.3 |
200 +----------------------------------------+-----------------------------------------------------------------------+
201
202 |
203
204 Advanced Options
205 ----------------
206
207 +----------------------------------------+------------------------------------------------------------------------+
208 | Parameter | Description |
209 +========================================+========================================================================+
210 | Report BND | Reports the inversions and translocations as BND events. Default: False|
211 +----------------------------------------+------------------------------------------------------------------------+
212 | Don't Report Seq | Don't report sequences for indels in vcf output. (Beta version!) |
213 | | Default: False |
214 +----------------------------------------+------------------------------------------------------------------------+
215 | Ignore sd | Ignores the sd based filtering. Default: False |
216 +----------------------------------------+------------------------------------------------------------------------+
217 | CCS Reads | Preset CCS Pacbio setting. (Beta) Default: False |
218 +----------------------------------------+------------------------------------------------------------------------+
219
220 |
221
222 Parameter Estimation Options
223 ----------------------------
224
225 +----------------------------------------+------------------------------------------------------------------------+
226 | Parameter | Description |
227 +========================================+========================================================================+
228 | Skip Parameter Estimation | Enables the scan if only very few reads are present. Default: False |
229 +----------------------------------------+------------------------------------------------------------------------+
230 | Estimated Deletion Ratio | Estimated ratio of deletions per read. Default: 0.0458369 |
231 +----------------------------------------+------------------------------------------------------------------------+
232 | Estimated Insertion Ratio | Estimated ratio of insertions per read. Default: 0.049379 |
233 +----------------------------------------+------------------------------------------------------------------------+
234 | Maximum Differences Per Window | Maximum differences per 100bp. Default: 50 |
235 +----------------------------------------+------------------------------------------------------------------------+
236 | Maximum Distance Between Alignment | Maximum distance between alignment (indel) events. Default: 4 |
237 | Events | |
238 +----------------------------------------+------------------------------------------------------------------------+
239
240 |
241
242 Output
243 ******
244
245 VCF Info field description
246 |
247 Sniffles report multiple information in the Info field. The entries are delimited by:
248 |
249 +-------------------+------------------------------------------------------------------------------------------------------+
250 | IMPRECISE/PRECISE | Indicates the confidence of the exact breakpoint positions (bp). |
251 +-------------------+------------------------------------------------------------------------------------------------------+
252 | CHR2= | The chromosome of the second breakpoint of the SV reported. |
253 +-------------------+------------------------------------------------------------------------------------------------------+
254 | END= | The position (bp) of the second breakpoint of the SV reported. |
255 +-------------------+------------------------------------------------------------------------------------------------------+
256 | ZMW= | For PacBio based reads, shows the number of ZMW that support the SV. |
257 +-------------------+------------------------------------------------------------------------------------------------------+
258 | SVTYPE= | The type of the SV. (see Alt field above) |
259 +-------------------+------------------------------------------------------------------------------------------------------+
260 | SUPTYPE= | Indicates what evidence supports the SVs (SR: Split Reads, AL: Alignment, NR: Noisy Region). |
261 +-------------------+------------------------------------------------------------------------------------------------------+
262 | STD_quant_start= | The standard deviation of the start breakpoints. |
263 +-------------------+------------------------------------------------------------------------------------------------------+
264 | STD_quant_stop= | The standard deviation of the stop breakpoints. |
265 +-------------------+------------------------------------------------------------------------------------------------------+
266 | RNAMES= | A comma separated list of read names that support the SV event. Controlled by -n Parameter. |
267 +-------------------+------------------------------------------------------------------------------------------------------+
268 | SVLEN= | Indicates the length of SVs. |
269 +-------------------+------------------------------------------------------------------------------------------------------+
270 | STRANDS= | Strand information at both breakpoints. |
271 +-------------------+------------------------------------------------------------------------------------------------------+
272 | SEQ= | If reportable shows the sequence of the indels. |
273 +-------------------+------------------------------------------------------------------------------------------------------+
274 | RE= | Number of reads supporting the variance. |
275 +-------------------+------------------------------------------------------------------------------------------------------+
276 | AF= | Allele frequency (only if run with –genotype) |
277 +-------------------+------------------------------------------------------------------------------------------------------+
278 |
279 Source: https://github.com/fritzsedlazeck/Sniffles/wiki
280 ]]>
281 </help>
282 <citations>
283 <citation type="doi">10.1038/s41592-018-0001-7</citation>
284 </citations>
285 </tool>