Mercurial > repos > iuc > lofreq_call
comparison lofreq_call.xml @ 0:31216d510164 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit 9efcb813ab17041c7f5aad834dfff45bd7046c60"
author | iuc |
---|---|
date | Tue, 17 Dec 2019 17:27:17 -0500 |
parents | |
children | dfadc322b065 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:31216d510164 |
---|---|
1 <tool id="lofreq_call" name="Call variants" version="@WRAPPER_VERSION@0"> | |
2 <description>with LoFreq</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 ## prepare reference genome and mapped reads input | |
9 @PREPARE_REF@ | |
10 ln -s '$reads' reads.bam && | |
11 ln -s -f '${reads.metadata.bam_index}' reads.bam.bai && | |
12 | |
13 ## call variants with lofreq | |
14 | |
15 ## make lofreq stick to tool contract by | |
16 ## generating tmp output inside job working dir | |
17 mkdir pp-tmp && | |
18 export TMPDIR=pp-tmp && | |
19 | |
20 lofreq call-parallel --pp-threads \${GALAXY_SLOTS:-1} --verbose | |
21 | |
22 --ref '$reference_fasta_fn' --out variants.vcf $variant_types | |
23 | |
24 #if str($regions.restrict_to_region) == 'regions_from_file': | |
25 --bed '$regions.bed' | |
26 #end if | |
27 | |
28 #if str($call_control.set_call_options) == 'yes': | |
29 --min-cov $call_control.coverage.min_cov | |
30 --max-depth $call_control.coverage.max_depth | |
31 $call_control.pe.use_orphan | |
32 --min-bq $call_control.bc_quals.min_bq | |
33 --min-alt-bq $call_control.bc_quals.min_alt_bq | |
34 --def-alt-bq $call_control.bc_quals.def_alt_bq | |
35 ${call_control.align_quals.alnqual.use_alnqual} | |
36 #if str($call_control.align_quals.alnqual.use_alnqual) != '-A -B': | |
37 ${call_control.align_quals.alnqual.alnqual_choice.alnquals_to_use} | |
38 ${call_control.align_quals.alnqual.alnqual_choice.extended_baq} | |
39 #end if | |
40 --min-mq $call_control.map_quals.min_mq | |
41 --max-mq $call_control.map_quals.use_mq.max_mq | |
42 $call_control.map_quals.use_mq.no_mq | |
43 #if str($call_control.source_qual.use_src_qual.src_qual): | |
44 $call_control.source_qual.use_src_qual.src_qual | |
45 #set $ign_vcfs = ','.join([str($ign_vcf) for $ign_vcf in $call_control.source_qual.use_src_qual.ign_vcf if $ign_vcf]) | |
46 #if $ign_vcfs: | |
47 --ign-vcf "$ign_vcfs" | |
48 #end if | |
49 --def-nm-q $call_control.source_qual.use_src_qual.def_nm_q | |
50 #end if | |
51 --min-jq $call_control.joint_qual.min_jq | |
52 --min-alt-jq $call_control.joint_qual.min_alt_jq | |
53 --def-alt-jq $call_control.joint_qual.def_alt_jq | |
54 #end if | |
55 | |
56 --sig $filter_control.sig | |
57 #set $bonf_factor = $filter_control.bonf or 'dynamic' | |
58 --bonf $bonf_factor | |
59 $filter_control.others | |
60 | |
61 reads.bam 2>&1 | |
62 | |
63 ## in case of errors add the log files produced | |
64 ## by the parallel workers to stderr | |
65 || (tool_exit_code=\$? && cat pp-tmp/lofreq2_call_parallel*/*.log 1>&2 && exit \$tool_exit_code) | |
66 | |
67 ## work around a bug in lofreq call-parallel | |
68 ## https://github.com/CSB5/lofreq/issues/85 | |
69 ## that causes the output format to be vcf.gz with certain filter | |
70 ## combinations. | |
71 #if str($bonf_factor) != 'dynamic': | |
72 #if '--no-default-filter' in str($filter_control.others): | |
73 && ln -s variants.vcf variants.vcf.gz | |
74 && gzip -df variants.vcf.gz | |
75 #end if | |
76 #end if | |
77 ]]></command> | |
78 <inputs> | |
79 <param type="data" name="reads" format="bam" label="Input reads in BAM format" /> | |
80 <expand macro="reference_interface" /> | |
81 <conditional name="regions"> | |
82 <param name="restrict_to_region" type="select" | |
83 label="Call variants across"> | |
84 <option value="genome">Whole reference</option> | |
85 <option value="regions_from_file">Regions specified in BED</option> | |
86 </param> | |
87 <when value="genome" /> | |
88 <when value="regions_from_file"> | |
89 <param argument="--bed" type="data" format="bed" | |
90 label="BED dataset with regions to examine" /> | |
91 </when> | |
92 </conditional> | |
93 <param name="variant_types" type="select" | |
94 label="Types of variants to call" | |
95 help="Note: When including indels in the called variants you should preprocess your input data to include indel alignment qualities"> | |
96 <option value="--call-indels">SNVs and indels</option> | |
97 <option value="" selected="True">Only SNVs</option> | |
98 <option value="--only-indels">Only indels</option> | |
99 </param> | |
100 <conditional name="call_control"> | |
101 <param name="set_call_options" type="select" | |
102 label="Variant calling parameters"> | |
103 <option value="no">Use default settings</option> | |
104 <option value="yes">Configure settings</option> | |
105 </param> | |
106 <when value="no" /> | |
107 <when value="yes"> | |
108 <section name="coverage" title="Coverage" expanded="true"> | |
109 <param name="min_cov" argument="--min-cov" type="integer" value="1" min="1" | |
110 label="Minimal coverage" | |
111 help="Do not attempt variant calling at sites that are not covered by at least this number of reads (default: 1)" /> | |
112 <param name="max_depth" argument="--max-depth" type="integer" value="1000000" min="1" | |
113 label="Coverage cap" | |
114 help="For efficiency, don not consider more than this number of reads at any site (default: 1,000,000)" /> | |
115 </section> | |
116 <section name="pe" title="Paired reads" expanded="true"> | |
117 <param name="use_orphan" argument="--use-orphan" type="boolean" truevalue="--use-orphan" falsevalue="" checked="False" | |
118 label="Use reads from anomalously mapped pairs" | |
119 help="Applies to paired-end reads only. If set to true, reads from pairs that are flagged as non-proper pairs (SAM/BAM FLAG field 2) will be used in variant calling. The default is to ignore such reads." /> | |
120 </section> | |
121 <section name="bc_quals" title="Base-calling quality" expanded="true"> | |
122 <param name="min_bq" argument="--min-bq" type="integer" value="6" | |
123 label="Minimum baseQ" | |
124 help="For variant calling at any given site, do not consider reads for which the base at that site has a base quality less than this value (default: 6)" /> | |
125 <param name="min_alt_bq" argument="--min-alt-bq" type="integer" min="0" value="6" | |
126 label="Minimum baseQ for alternate bases" | |
127 help="For variant calling at any given site, do not consider reads that support a non-reference allele at the site if that base has a base quality less than this value (default: 6). Note: this setting will have no effect if the specified value is less than the general Minimum baseQ above." /> | |
128 <param name="def_alt_bq" argument="--def-alt-bq" type="integer" min="0" value="0" | |
129 label="Overwrite baseQs of alternate bases with this value" | |
130 help="After filtering reads according to the Minimum baseQ settings above, set the base quality of the non-reference bases in the surviving reads to this new value (default: 0 = keep the original base quality)." /> | |
131 </section> | |
132 <section name="align_quals" title="Base alignment quality" expanded="true" | |
133 help="Choose here whether you want to incorporate base and/or indel alignment qualities into lofreq's joint quality model. If you have previously computed and stored (using lofreq alnqual) any of these quality scores into your input dataset, you can tell the tool to reuse them. Alternatively, the tool can calculate the necessary scores on the fly."> | |
134 <conditional name="alnqual"> | |
135 <param name="use_alnqual" type="select" | |
136 label="Consider base/indel alignment qualities during variant calling?"> | |
137 <option value="">Yes, and prefer existing alignment qualities encoded in input</option> | |
138 <option value="--del-baq">Yes, (re)calculate alignment qualities on the fly and use them</option> | |
139 <option value="-A -B">No, don't make use of alignment qualities</option> | |
140 </param> | |
141 <when value=""> | |
142 <expand macro="handle_existing_alnqual" /> | |
143 </when> | |
144 <when value="--del-baq"> | |
145 <expand macro="handle_alnqual" mode="Add and use" /> | |
146 </when> | |
147 <when value="-A -B" /> | |
148 </conditional> | |
149 </section> | |
150 <section name="map_quals" title="Mapping quality" expanded="true"> | |
151 <param name="min_mq" argument="--min-mq" type="integer" value="0" | |
152 label="Minimum mapping quality" | |
153 help="For variant calling at any given site, do not consider reads with a mapping quality (MAPQ) less than this value (default: 0 = do not filter on read mapping quality)." /> | |
154 <conditional name="use_mq"> | |
155 <param name="no_mq" argument="--no-mq" type="select" | |
156 label="Consider mapping quality during variant calling?"> | |
157 <option value="">Yes, incorporate MAPQ into joint quality score</option> | |
158 <option value="--no-mq">No, ignore MAPQ scores during variant calling</option> | |
159 </param> | |
160 <when value=""> | |
161 <param name="max_mq" argument="--max-mq" type="integer" value="255" | |
162 label="Maximum mapping quality" | |
163 help="For the joint quality model at any site, cap the mapping quality of reads at this value (default: 255 = do not cap mapping qualities). Note: The special MAPQ value 255 is used by many tools to indicate undefined mapping quality, and lofreq call will ignore such reads during variant calling. Capping, however, will turn 255 into a regular MAPQ score. Thus, if you need to avoid using such reads, you should filter out MAPQ 255 reads from your input data with other tools before using lofreq call with MAPQ capping."/> | |
164 </when> | |
165 <when value="--no-mq"> | |
166 <param name="max_mq" type="hidden" value="" /> | |
167 </when> | |
168 </conditional> | |
169 </section> | |
170 <section name="source_qual" title="Source quality" expanded="true"> | |
171 <conditional name="use_src_qual"> | |
172 <param argument="--src-qual" name="src_qual" type="select" | |
173 label="Compute source quality and consider it during variant calling"> | |
174 <option value="">No, don't incorporate source quality into joint quality score</option> | |
175 <option value="--src-qual">Yes, compute source quality and merge it into joint quality score</option> | |
176 </param> | |
177 <when value="" /> | |
178 <when value="--src-qual"> | |
179 <param name="def_nm_q" argument="--def-nm-q" type="integer" min="-1" value="-1" | |
180 label="Replace non-match base qualities with this value" | |
181 help="For the calculation of the source quality, replace all base qualities of non-match bases with this value (default: -1 = use original base qualities)." /> | |
182 <param name="ign_vcf" argument="--ign-vcf" type="data" format="vcf" optional="true" multiple="true" | |
183 label="VCF input(s) with variants to ignore for source quality computation" | |
184 help="Mismatches caused by known true variants in your samples should not lower the source quality estimate. If any read in your input has a base that is non-reference, but supports one of the known variants in the specified VCF datasets, that base will not be treated as a mismatch." /> | |
185 </when> | |
186 </conditional> | |
187 </section> | |
188 <section name="joint_qual" title="Joint quality" expanded="true"> | |
189 <param name="min_jq" argument="--min-jq" type="integer" min="0" value="0" | |
190 label="Minimum joinedQ" | |
191 help="At any site, do not use reads for variant calling, if their calculated joint quality at that site is lower than this value (default: 0 = do not filter based on joint quality)" /> | |
192 <param name="min_alt_jq" argument="--min-alt-jq" type="integer" min="0" value="0" | |
193 label="Minimum joinedQ for alternate bases" | |
194 help="At any site, do not use reads for variant calling, if they support a non-reference allele at that site and their calculated joint quality at the site is lower than this value (default: 0 = do not filter based on joint quality). Note: this setting has no effect if the specified value is smaller than the general Minimum joinedQ set above." /> | |
195 <!-- def-alt-jq==-1, though documented, is currently not implemented --> | |
196 <param name="def_alt_jq" argument="--def-alt-jq" type="integer" min="0" value="0" | |
197 label="Overwrite joinedQs of alternate bases with this value" | |
198 help="After filtering according to the Minimum joinedQ settings above, set the joint quality values for all reads surviving filtering and supporting a non-reference allele to this value (default: 0 = use the original calculated joint quality)." /> | |
199 </section> | |
200 </when> | |
201 </conditional> | |
202 <conditional name="filter_control"> | |
203 <param name="filter_type" type="select" | |
204 label="Variant filter parameters"> | |
205 <option value="set_all_off">Strictly no filtering</option> | |
206 <option value="set_no_default">Preset QUAL score-based filtering</option> | |
207 <option value="set_lofreq_standard" selected="true">Preset filtering on QUAL score + coverage + strand bias (lofreq call default)</option> | |
208 <option value="set_custom">Custom filter settings/combinations</option> | |
209 </param> | |
210 <when value="set_all_off"> | |
211 <param name="sig" type="hidden" value="1" /> | |
212 <param name="bonf" type="hidden" value="1" /> | |
213 <param name="others" type="hidden" value="--no-default-filter" /> | |
214 </when> | |
215 <when value="set_no_default"> | |
216 <param name="sig" type="hidden" value="0.01" /> | |
217 <param name="bonf" type="hidden" value="dynamic" /> | |
218 <param name="others" type="hidden" value="--no-default-filter" /> | |
219 </when> | |
220 <when value="set_lofreq_standard"> | |
221 <param name="sig" type="hidden" value="0.01" /> | |
222 <param name="bonf" type="hidden" value="dynamic" /> | |
223 <param name="others" type="hidden" value="" /> | |
224 </when> | |
225 <when value="set_custom"> | |
226 <param name="sig" type="float" value="0.01" min="0" max="1" | |
227 label="Significance threshold for calls"/> | |
228 <param name="bonf" type="integer" value="0" min="0" | |
229 label="Bonferroni correction factor for multiple testing" | |
230 help="Set to zero to determine dynamically from actual number of variant tests performed. Dynamic detection will calculate separate correction factors for SNVs and indels" /> | |
231 <param name="others" type="boolean" truevalue="" falsevalue="--no-default-filter" checked="true" | |
232 label="Apply default coverage and strand-bias filter?" | |
233 help="" /> | |
234 </when> | |
235 </conditional> | |
236 </inputs> | |
237 <outputs> | |
238 <data name="variants" from_work_dir="variants.vcf" format="vcf" /> | |
239 </outputs> | |
240 <tests> | |
241 <test> | |
242 <param name="reads" ftype="bam" value="lofreq-in1.bam" /> | |
243 <param name="ref_selector" value="history" /> | |
244 <param name="ref" ftype="fasta" value="pBR322.fa" /> | |
245 <output name="variants" file="call-out1.vcf" lines_diff="4" /> | |
246 </test> | |
247 <test> | |
248 <param name="reads" ftype="bam" value="lofreq-in1.bam" /> | |
249 <param name="ref_selector" value="history" /> | |
250 <param name="ref" ftype="fasta" value="pBR322.fa" /> | |
251 <conditional name="filter_control"> | |
252 <param name="filter_type" value="set_all_off" /> | |
253 </conditional> | |
254 <output name="variants" file="call-out2.vcf" lines_diff="4" /> | |
255 </test> | |
256 </tests> | |
257 <help><![CDATA[ | |
258 lofreq call: call variants from BAM file | |
259 | |
260 LoFreq is a fast and sensitive variant-caller for inferring SNVs and indels | |
261 from next-generation sequencing data. It makes full use of base-call qualities | |
262 and other sources of errors inherent in sequencing, which are usually ignored | |
263 by other methods or only used for filtering. | |
264 | |
265 LoFreq can run on almost any type of aligned sequencing data since no machine- | |
266 or sequencing-technology dependent thresholds are used. It automatically adapts | |
267 to changes in coverage and sequencing quality and can therefore be applied to a | |
268 variety of data-sets e.g. viral/quasispecies, bacterial, metagenomics or | |
269 somatic data. | |
270 | |
271 While the tool will often give reasonable results with default settings a | |
272 variety of options let you control its exact behavior. These advanced options | |
273 can be subdivided into those affecting variant calling and those affecting | |
274 posterior filtering of the results. | |
275 | |
276 **Variant calling paramters** | |
277 | |
278 At the heart of LoFreq's variant caller is a **joint quality score** that is | |
279 computed for every site in every read (that survives filtering) and that | |
280 combines some or all of the following read and base quality measures: | |
281 | |
282 - Base/indel quality | |
283 | |
284 For any read, this is the Phred-scaled likelihood that the base mapped to a | |
285 given site does not represent a sequencing error. For every base, this score | |
286 got computed by the base caller of your sequencing platform and got | |
287 incorporated into your input dataset during read alignment. | |
288 | |
289 For insertions/deletions this is defined, analogously, as the Phred-scaled | |
290 likelihood that any inserted/deleted base is real, however, you are | |
291 responsible for adding indel qualitites, which are required for indel | |
292 calling with lofreq, to your input. | |
293 | |
294 For doing so, you can use ``lofreq indelqual`` or GATK's BQSR. | |
295 | |
296 - Base/indel alignment quality | |
297 | |
298 For any read, this is the Phred-scaled likelihood that the read's base or | |
299 indel mapped to a given reference genome position is mapped to this position | |
300 correctly. | |
301 | |
302 The tool can calculate these scores for you on the fly. Alternatively, you | |
303 can precalculate them using ``lofreq alnqual``, which will incorporate them | |
304 into your input dataset. | |
305 | |
306 - Mapping quality | |
307 | |
308 The Phred-scaled likelihood that the read got mapped to the correct place | |
309 in the reference genome. This score got incorporated into your input dataset | |
310 by the aligner you used to map your reads. | |
311 | |
312 - Source quality | |
313 | |
314 This is the Phred-scaled likelihood that the given read comes from the | |
315 reference genome. The tool can calculate this score for you. | |
316 | |
317 | |
318 **Variant filter parameters** | |
319 | |
320 After generating a list of called variants, the tool can filter this list | |
321 based on: | |
322 | |
323 - the statistical significance of the variant calls | |
324 - strand-bias of reads supporting the variant | |
325 - coverage of the variant site | |
326 | |
327 While posterior filtering can help reduce false-positive variant calls, please | |
328 note that the separate ``lofreq filter``, which can be run on the output of | |
329 ``lofreq call`` has many more options for configuring filters. | |
330 | |
331 These are the different filter settings supported by the tool: | |
332 | |
333 *Preset filtering on QUAL score + coverage + strand bias* | |
334 | |
335 For variants to pass this filter, the following is required: | |
336 | |
337 - statistical signficance of the variant call with a pvalue < 0.01 based on the | |
338 retransformed QUAL score of the variant and multiple-testing corrected using | |
339 a dynamically determined Bonferroni factor (based on the number of overall | |
340 variants considered during calling). | |
341 | |
342 - A strand-bias in supporting reads not significant under a FDR-corrected p | |
343 value of 0.001 and 85% of supporting reads mapped to the same strand of the | |
344 genome. | |
345 | |
346 - A coverage of the variant site of at least 10x. | |
347 | |
348 *Preset QUAL score-based filtering* | |
349 | |
350 Same QUAL-based significance filter as the default, but without the strand-bias | |
351 and coverage criteria | |
352 | |
353 *Strictly no filtering* | |
354 | |
355 Do not apply any filters, but produce the original list of all called variants. | |
356 You will almost always want to use ``lofreq filter`` to process the resulting | |
357 output. | |
358 | |
359 *Custom filter settings/combinations* | |
360 | |
361 Lets you define your own QUAL-based significance filter and, optionally, | |
362 combine it with the default starnd-bias and coverage filters. | |
363 ]]></help> | |
364 <expand macro="citations" /> | |
365 </tool> |