Mercurial > repos > iuc > bcftools_csq
comparison macros.xml @ 0:2a6c13f8cc5a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9d03fe38504a35d11660dadb44cb1beee32fcf4e
author | iuc |
---|---|
date | Thu, 13 Apr 2017 17:47:16 -0400 |
parents | |
children | 7cbf3c66d4ad |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2a6c13f8cc5a |
---|---|
1 <macros> | |
2 <token name="@VERSION@">1.4.0</token> | |
3 <xml name="stdio"> | |
4 <stdio> | |
5 <exit_code range="1:" /> | |
6 <exit_code range=":-1" /> | |
7 <regex match="Error:" /> | |
8 <regex match="Exception:" /> | |
9 </stdio> | |
10 </xml> | |
11 <xml name="requirements"> | |
12 <requirements> | |
13 <requirement type="package" version="1.4">bcftools</requirement> | |
14 <requirement type="package" version="1.4">htslib</requirement> | |
15 <yield /> | |
16 </requirements> | |
17 </xml> | |
18 <xml name="samtools_requirement"> | |
19 <requirement type="package" version="1.3.1">samtools</requirement> | |
20 </xml> | |
21 <xml name="version_command"> | |
22 <version_command>bcftools 2>&1 | grep 'Version:'</version_command> | |
23 </xml> | |
24 | |
25 <xml name="citations"> | |
26 <citations> | |
27 <citation type="doi">10.1093/bioinformatics/btp352</citation> | |
28 <yield /> | |
29 </citations> | |
30 </xml> | |
31 <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token> | |
32 <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token> | |
33 <token name="@THREADS@"> | |
34 --threads \${GALAXY_SLOTS:-4} | |
35 </token> | |
36 <token name="@PREPARE_ENV@"> | |
37 <![CDATA[ | |
38 export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`; | |
39 ]]> | |
40 </token> | |
41 <xml name="macro_input"> | |
42 <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" /> | |
43 </xml> | |
44 <token name="@PREPARE_INPUT_FILE@"> | |
45 <![CDATA[ | |
46 ## May need to symlink input if there is an associated | |
47 #set $input_vcf = 'input.vcf.gz' | |
48 #if $input_file.is_of_type('vcf') | |
49 bgzip -c '$input_file' > $input_vcf && | |
50 bcftools index $input_vcf && | |
51 #elif $input_file.is_of_type('vcf_bgzip') | |
52 ln -s '$input_file' $input_vcf | |
53 #elif $input_file.is_of_type('bcf') | |
54 #set $input_vcf = 'input.bcf' | |
55 ln -s '$input_file' $input_vcf && | |
56 #if $input_file.metadata.bcf_index: | |
57 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && | |
58 #else | |
59 bcftools index $input_vcf && | |
60 #end if | |
61 #elif $input_file.is_of_type('bcf_bgzip') | |
62 ln -s '$input_file' $input_vcf | |
63 #end if | |
64 ]]> | |
65 </token> | |
66 <token name="@INPUT_FILE@"> | |
67 $input_vcf | |
68 </token> | |
69 | |
70 <xml name="macro_inputs"> | |
71 <param name="input_files" type="data" format="vcf,bcf" label="Other VCF/BCF Datasets" multiple="True" /> | |
72 </xml> | |
73 <token name="@PREPARE_INPUT_FILES@"> | |
74 <![CDATA[ | |
75 ## May need to symlink input if there is an associated | |
76 #set $input_vcfs = [] | |
77 #set $vcfs_list_file = 'vcfs_list' | |
78 #for (i, input_file) in enumerate($input_files): | |
79 #set $input_vcf = 'input' + str($i) + '.vcf.gz' | |
80 #if $input_file.is_of_type('vcf') | |
81 bgzip -c '$input_file' > $input_vcf && | |
82 bcftools index $input_vcf && | |
83 #elif $input_file.is_of_type('vcf_bgz') | |
84 ln -s '$input_file' $input_vcf | |
85 #elif $input_file.is_of_type('bcf') | |
86 #set $input_vcf = 'input' + str($i) + '.bcf.gz' | |
87 ln -s '$input_file' $input_vcf && | |
88 #if $input_file.metadata.bcf_index: | |
89 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && | |
90 #else | |
91 bcftools index $input_vcf && | |
92 #end if | |
93 #elif $input_file.is_of_type('bcfvcf_bgz') | |
94 ln -s '$input_file' $input_vcf && | |
95 #end if | |
96 echo '$input_vcf' >> $vcfs_list_file && | |
97 $input_vcfs.append($input_vcf) | |
98 #end for | |
99 ]]> | |
100 </token> | |
101 <token name="@INPUT_FILES@"> | |
102 #echo ' '.join($input_vcfs)# | |
103 </token> | |
104 <token name="@INPUT_LIST_FILE@"> | |
105 $vcfs_list_file | |
106 </token> | |
107 | |
108 <xml name="macro_fasta_ref"> | |
109 <param name="fasta_ref" argument="--fasta-ref" type="data" format="data" label="Reference sequence in FASTA format" optional="True" /> | |
110 </xml> | |
111 <token name="@PREPARE_FASTA_REF@"> | |
112 <![CDATA[ | |
113 #set $input_fa_ref = None | |
114 #if 'fasta_ref' in $section and $section.fasta_ref: | |
115 #set $input_fa_ref = 'ref.fa' | |
116 ln -s '$section.fasta_ref' $input_fa_ref && | |
117 samtools faidx $input_fa_ref && | |
118 #end if | |
119 ]]> | |
120 </token> | |
121 <token name="@FASTA_REF@"> | |
122 #if $input_fa_ref is not None: | |
123 --fasta-ref $input_fa_ref | |
124 #elif 'fasta_ref' in $section and $section.fasta_ref: | |
125 --fasta-ref '${section.fasta_ref}' | |
126 #end if | |
127 </token> | |
128 | |
129 <xml name="macro_ref_fasta"> | |
130 <conditional name="reference_source"> | |
131 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> | |
132 <option value="cached">Locally cached</option> | |
133 <option value="history">History</option> | |
134 </param> | |
135 <when value="cached"> | |
136 <param name="ref_file" type="select" label="Select reference genome"> | |
137 <options from_data_table="fasta_indexes"> | |
138 <!--<filter type="data_meta" key="dbkey" ref="input_bam" column="value"/>--> | |
139 </options> | |
140 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
141 </param> | |
142 </when> | |
143 <when value="history"> <!-- FIX ME!!!! --> | |
144 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
145 </when> | |
146 </conditional> | |
147 </xml> | |
148 | |
149 | |
150 <xml name="macro_AF_file"> | |
151 <param name="AF_file" argument="--AF-file" type="data" format="tabular" label="Allele frequencies file" optional="True" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" /> | |
152 </xml> | |
153 <!-- This may need to bgzip and tabix the file --> | |
154 <token name="@PREPARE_AF_FILE@"> | |
155 <![CDATA[ | |
156 #if 'AF_file' in $section and $section.AF_file: | |
157 #pass | |
158 #end if | |
159 ]]> | |
160 </token> | |
161 <token name="@AF_FILE@"> | |
162 #if 'AF_file' in $section and $section.AF_file: | |
163 --AF-file '${section.AF_file}' | |
164 #end if | |
165 </token> | |
166 | |
167 <xml name="macro_estimate_AF"> | |
168 <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" label="Estimate allele frequency" optional="True" help="calculate AC,AN counts on the fly, using either all samples ("-") or samples listed in <file>" /> | |
169 </xml> | |
170 <token name="@ESTIMATE_AF@"> | |
171 #if 'estimate_AF' in $section and $section.estimate_AF: | |
172 --estimate-AF "${section.estimate_AF}" | |
173 #end if | |
174 </token> | |
175 | |
176 <xml name="macro_exons_file"> | |
177 <param name="exons_file" type="data" format="tabular" label="exons file" optional="True" help="tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" /> | |
178 </xml> | |
179 <token name="@PREPARE_EXONS_FILE@"> | |
180 <![CDATA[ | |
181 #set $exons_path = None | |
182 #if 'exons_file' in $section and $section.exons_file: | |
183 #set $exons_path = 'exons_file.tab.gz' | |
184 bgzip -c "$section.exons_file" > $exons_path && | |
185 tabix -s 1 -b 2 -e 3 $exons_path && | |
186 #end if | |
187 ]]> | |
188 </token> | |
189 <token name="@EXONS_FILE@"> | |
190 #if 'exons_file' in $section and $section.exons_file: | |
191 --exons $exons_path | |
192 #end if | |
193 </token> | |
194 | |
195 <xml name="macro_ploidy_file"> | |
196 <param name="ploidy_file" type="data" format="tabular" label="Ploidy file" optional="True" help="tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" /> | |
197 </xml> | |
198 <token name="@PLOIDY_FILE@"> | |
199 #if 'ploidy_file' in $section and $section.ploidy_file: | |
200 --ploidy "${section.ploidy_file}" | |
201 #end if | |
202 </token> | |
203 | |
204 <xml name="macro_collapse_opt_none"> | |
205 <option value="none">none - require the exact same set of alleles in all files</option> | |
206 </xml> | |
207 <xml name="macro_collapse_opt_id"> | |
208 <option value="id">id - only records with identical ID column are compatible. </option> | |
209 </xml> | |
210 <xml name="macro_collapse"> | |
211 <param name="collapse" type="select" label="Collapse" optional="True" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files"> | |
212 <option value="snps">snps - allow different alleles, as long as they all are SNPs</option> | |
213 <option value="indels">indels - allow different alleles, as long as they all are indels</option> | |
214 <option value="both">both - indels and snps </option> | |
215 <option value="some">some - at least some of the ALTs must match</option> | |
216 <option value="any">any - any combination of alleles</option> | |
217 <yield/> | |
218 </param> | |
219 </xml> | |
220 <token name="@COLLAPSE@"> | |
221 #if $section.collapse: | |
222 --collapse ${section.collapse} | |
223 #end if | |
224 </token> | |
225 | |
226 <xml name="macro_apply_filters"> | |
227 <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true" | |
228 help="(-f --apply-filters) Skip sites where FILTER column does not contain any of the strings listed (e.g. "PASS,.")"> | |
229 <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator> | |
230 </param> | |
231 </xml> | |
232 <token name="@APPLY_FILTERS@"> | |
233 #if $section.apply_filters: | |
234 --apply-filters '${section.apply_filters}' | |
235 #end if | |
236 </token> | |
237 | |
238 <xml name="macro_select_output_type"> | |
239 <param name="output_type" type="select"> | |
240 <option value="b">compressed BCF</option> | |
241 <!-- no galaxy datatypes for these | |
242 <option value="u">uncompressed BCF</option> | |
243 <option value="z">compressed VCF</option> | |
244 --> | |
245 <option value="v">uncompressed VCF</option> | |
246 </param> | |
247 </xml> | |
248 <token name="@OUTPUT_TYPE@"> | |
249 #if str($output_type) != "__none__": | |
250 --output-type '${output_type}' | |
251 #end if | |
252 </token> | |
253 | |
254 <xml name="macro_vcf_output"> | |
255 <data name="output_file" format="vcf"> | |
256 <change_format> | |
257 <when input="output_type" value="b" format="bcf" /> | |
258 <when input="output_type" value="u" format="bcf" /> | |
259 <when input="output_type" value="z" format="vcf_bgzip" /> | |
260 <when input="output_type" value="v" format="vcf" /> | |
261 </change_format> | |
262 </data> | |
263 </xml> | |
264 | |
265 <xml name="macro_regions"> | |
266 <conditional name="regions"> | |
267 <param name="regions_src" type="select" label="Regions"> | |
268 <option value="__none__">None</option> | |
269 <option value="regions">regions</option> | |
270 <option value="regions_file">regions-file</option> | |
271 </param> | |
272 <when value="__none__"/> | |
273 <when value="regions"> | |
274 <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true" | |
275 help="Each region is specifed as: chr or chr:pos or chr:from-to"> | |
276 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> | |
277 </param> | |
278 </when> | |
279 <when value="regions_file"> | |
280 <param name="regions_file" type="data" format="vcf,bed,tabular" label="Regions File" optional="True" help="restrict to regions listed in a file" /> | |
281 </when> | |
282 </conditional> | |
283 </xml> | |
284 <token name="@PREPARE_REGIONS_FILE@"> | |
285 <![CDATA[ | |
286 #set $regions_path = None | |
287 #if 'regions' in $section | |
288 #if $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: | |
289 #if $section.regions.regions_file.ext.startswith('bed'): | |
290 #set $regions_path = 'regions_file.bed' | |
291 ln -s '$section.regions.regions_file' $regions_path && | |
292 #end if | |
293 #end if | |
294 #end if | |
295 ]]> | |
296 </token> | |
297 <token name="@REGIONS@"> | |
298 #if $section.regions.regions_src == 'regions' and $section.regions.regions != '': | |
299 --regions '$section.regions.regions' | |
300 #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: | |
301 #if $regions_path is not None: | |
302 --regions-file '$regions_path' | |
303 #else: | |
304 --regions-file '$section.regions.regions_file' | |
305 #end if | |
306 #end if | |
307 </token> | |
308 <xml name="macro_targets_file"> | |
309 <param name="targets_file" type="data" format="tabular" label="Targets File" help="restrict to targets listed in a file" > | |
310 <yield/> | |
311 </param> | |
312 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the target file selection" /> | |
313 </xml> | |
314 <token name="@PREPARE_TARGETS_FILE@"> | |
315 <![CDATA[ | |
316 #set $targets_path = None | |
317 #if 'targets' in $section | |
318 #if $section.targets.targets_src == 'targets_file': | |
319 #set $targets_path = 'targets_file.tab.gz' | |
320 bgzip -c "$section.targets.targets_file" > $targets_path && | |
321 tabix -s 1 -b 2 -e 2 $targets_path && | |
322 #end if | |
323 #elif $tgts_sec.targets_file: | |
324 #set $targets_path = 'targets_file.tab.gz' | |
325 bgzip -c "$section.targets_file" > $targets_path && | |
326 tabix -s 1 -b 2 -e 2 $targets_path && | |
327 #end if | |
328 ]]> | |
329 </token> | |
330 <token name="@TARGETS_FILE@"> | |
331 <![CDATA[ | |
332 #if $targets_path is not None: | |
333 --targets-file "${section.invert_targets_file}${targets_path}" | |
334 #elif $section.targets_file: | |
335 --targets-file "${section.invert_targets_file}${section.targets_file}" | |
336 #end if | |
337 ]]> | |
338 </token> | |
339 | |
340 <xml name="macro_targets"> | |
341 <conditional name="targets"> | |
342 <param name="targets_src" type="select" label="Targets"> | |
343 <option value="__none__">None</option> | |
344 <option value="targets">targets</option> | |
345 <option value="targets_file">targets-file</option> | |
346 </param> | |
347 <when value="__none__"/> | |
348 <when value="targets"> | |
349 <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true" | |
350 help="Each target is specifed as: chr or chr:pos or chr:from-to"> | |
351 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> | |
352 </param> | |
353 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the targets" /> | |
354 </when> | |
355 <when value="targets_file"> | |
356 <expand macro="macro_targets_file"> | |
357 </expand> | |
358 </when> | |
359 </conditional> | |
360 </xml> | |
361 <token name="@TARGETS@"> | |
362 <![CDATA[ | |
363 #if $targets_path: | |
364 --targets-file "${section.targets.invert_targets_file}${targets_path}" | |
365 #else: | |
366 #if $section.targets.targets_src == 'targets' and $section.targets.targets != '': | |
367 --targets '${section.targets.invert_targets_file}${section.targets.targets}' | |
368 #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file: | |
369 --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}" | |
370 #end if | |
371 #end if | |
372 ]]> | |
373 </token> | |
374 | |
375 <xml name="macro_samples"> | |
376 <param name="samples" type="text" value="" label="Samples" optional="true" | |
377 help="(-s) comma separated list of samples to annotate (or exclude)"> | |
378 <validator type="regex" message="">^(\w+(,\w+)*)?$</validator> | |
379 </param> | |
380 <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples" | |
381 help="inverts the query/filtering applied by Samples (adds "^" prefix to exclude)" /> | |
382 <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True" | |
383 help="(-S) file of samples to include" /> | |
384 <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File" | |
385 help="inverts the query/filtering applied by Samples File" /> | |
386 </xml> | |
387 <token name="@SAMPLES@"> | |
388 #set $samples_defined = False | |
389 #if str($section.samples) != '': | |
390 #set $samples_defined = True | |
391 --samples '${section.invert_samples}${section.samples}' | |
392 #end if | |
393 #if $section.samples_file: | |
394 #set $samples_defined = True | |
395 --samples-file "${section.invert_samples_file}${section.samples_file}" | |
396 #end if | |
397 </token> | |
398 | |
399 <xml name="macro_sample"> | |
400 <param name="sample" type="text" label="Sample" optional="True" help="apply variants of the given sample" /> | |
401 </xml> | |
402 <token name="@SAMPLE@"> | |
403 #if $section.sample: | |
404 --sample '${section.sample}' | |
405 #end if | |
406 </token> | |
407 | |
408 | |
409 <xml name="macro_include"> | |
410 <param name="include" type="text" label="Include" optional="True" help="(-i) select sites for which the expression is true"> | |
411 <validator type="regex" message="Single quote not allowed">^[^']*$</validator> | |
412 <sanitizer sanitize="False"/> | |
413 </param> | |
414 </xml> | |
415 <token name="@INCLUDE@"> | |
416 #if $section.include: | |
417 --include '${section.include}' | |
418 #end if | |
419 </token> | |
420 | |
421 <xml name="macro_exclude"> | |
422 <param name="exclude" type="text" label="Exclude" optional="True" help="(-e) exclude sites for which the expression is true"> | |
423 <validator type="regex" message="Single quote not allowed">^[^']*$</validator> | |
424 <sanitizer sanitize="False"/> | |
425 </param> | |
426 </xml> | |
427 <token name="@EXCLUDE@"> | |
428 #if $section.exclude: | |
429 --exclude '${section.exclude}' | |
430 #end if | |
431 </token> | |
432 | |
433 <xml name="macro_columns"> | |
434 <param name="columns" type="text" value="" label="Columns" optional="true" | |
435 help="list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details"> | |
436 <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator> | |
437 </param> | |
438 </xml> | |
439 <token name="@COLUMNS@"> | |
440 #if $section.columns != '': | |
441 --columns '${section.columns}' | |
442 #end if | |
443 </token> | |
444 | |
445 <xml name="macro_haploid2diploid"> | |
446 <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" /> | |
447 </xml> | |
448 | |
449 <xml name="macro_vcf_ids"> | |
450 <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" /> | |
451 </xml> | |
452 <token name="@VCF_IDS@"> | |
453 ${section.vcf_ids} | |
454 </token> | |
455 | |
456 <token name="@OUTPUT_HELP@"> | |
457 <![CDATA[ | |
458 Output Type | |
459 ----------- | |
460 | |
461 Output compressed BCF (b), or uncompressed VCF (v). | |
462 Use the BCF option when piping between bcftools subcommands to speed up | |
463 performance by removing unecessary compression/decompression | |
464 and VCF<->BCF conversion. | |
465 | |
466 This Galaxy tool recommends using the compressed BCF format | |
467 as piping is not implemented, and uncompressed data would | |
468 use unnecessary amounts of space. | |
469 ]]></token> | |
470 <token name="@REGIONS_HELP@"> | |
471 <![CDATA[ | |
472 Region Selections | |
473 ----------------- | |
474 | |
475 Regions can be specified in a VCF, | |
476 BED, or tab-delimited file (the default). The columns of the | |
477 tab-delimited file are: CHROM, POS, and, optionally, POS_TO, | |
478 where positions are 1-based and inclusive. Uncompressed | |
479 files are stored in memory, while bgzip-compressed and | |
480 tabix-indexed region files are streamed. Note that sequence | |
481 names must match exactly, "chr20" is not the same as "20". | |
482 Also note that chromosome ordering in FILE will be | |
483 respected, the VCF will be processed in the order in which | |
484 chromosomes first appear in FILE. However, within | |
485 chromosomes, the VCF will always be processed in ascending | |
486 genomic coordinate order no matter what order they appear in | |
487 FILE. Note that overlapping regions in FILE can result in | |
488 duplicated out of order positions in the output. This option | |
489 requires indexed VCF/BCF files. | |
490 ]]></token> | |
491 <token name="@TARGETS_HELP@"><![CDATA[ | |
492 Targets | |
493 ------- | |
494 | |
495 Similar to regions, but the next position is accessed by streaming the whole | |
496 VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be | |
497 applied simultaneously: regions uses the index to jump to a region and targets discards | |
498 positions which are not in the targets. Unlike regions, targets can be prefixed with | |
499 "^" to request logical complement. For example, "^X,Y,MT" indicates that | |
500 sequences X, Y and MT should be skipped. Yet another difference between the two | |
501 is that regions checks both start and end positions of indels, whereas targets checks | |
502 start positions only. | |
503 | |
504 For the bcftools call command, with the option -C alleles, third column of the | |
505 targets file must be comma-separated list of alleles, starting with the | |
506 reference allele. Note that the file must be compressed and index. Such a file | |
507 can be easily created from a VCF using:: | |
508 | |
509 bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz | |
510 ]]> | |
511 <!-- TODO: galaxy-ify --> | |
512 </token> | |
513 | |
514 | |
515 <token name="@COLLAPSE_HELP@"> | |
516 Collapse | |
517 -------- | |
518 | |
519 Controls how to treat records with duplicate positions and defines compatible | |
520 records across multiple input files. Here by "compatible" we mean records which | |
521 should be considered as identical by the tools. For example, when performing | |
522 line intersections, the desire may be to consider as identical all sites with | |
523 matching positions (bcftools isec -c all), or only sites with matching variant | |
524 type (bcftools isec -c snps -c indels), or only sites with all alleles | |
525 identical (bcftools isec -c none). | |
526 | |
527 | |
528 +------------+----------------------------------------------------------------+ | |
529 | Flag value | Result | | |
530 +============+================================================================+ | |
531 | none | only records with identical REF and ALT alleles are compatible | | |
532 +------------+----------------------------------------------------------------+ | |
533 | some | only records where some subset of ALT alleles match are | | |
534 | | compatible | | |
535 +------------+----------------------------------------------------------------+ | |
536 | all | all records are compatible, regardless of whether the ALT | | |
537 | | alleles match or not. In the case of records with the same | | |
538 | | position, only the first wil lbe considered and appear on | | |
539 | | output. | | |
540 +------------+----------------------------------------------------------------+ | |
541 | snps | any SNP records are compatible, regardless of whether the ALT | | |
542 | | alleles match or not. For duplicate positions, only the first | | |
543 | | SNP record will be considered and appear on output. | | |
544 +------------+----------------------------------------------------------------+ | |
545 | indels | all indel records are compatible, regardless of whether the | | |
546 | | REF and ALT alleles match or not. For duplicate positions, | | |
547 | | only the first indel record will be considered and appear on | | |
548 | | output. | | |
549 +------------+----------------------------------------------------------------+ | |
550 | both | abbreviation of "-c indels -c snps" | | |
551 +------------+----------------------------------------------------------------+ | |
552 | id | only records with identical ID column are compatible. | | |
553 | | Supportedby bcftools merge only. | | |
554 +------------+----------------------------------------------------------------+ | |
555 </token> | |
556 | |
557 <token name="@EXPRESSIONS_HELP@"> | |
558 <![CDATA[ | |
559 Expressions | |
560 ----------- | |
561 | |
562 Valid expressions may contain: | |
563 | |
564 - numerical constants, string constants | |
565 | |
566 :: | |
567 | |
568 1, 1.0, 1e-4 | |
569 "String" | |
570 | |
571 - arithmetic operators | |
572 | |
573 :: | |
574 | |
575 +,*,-,/ | |
576 | |
577 - comparison operators | |
578 | |
579 :: | |
580 | |
581 == (same as =), >, >=, <=, <, != | |
582 | |
583 - regex operators "~" and its negation "!~" | |
584 | |
585 :: | |
586 | |
587 INFO/HAYSTACK ~ "needle" | |
588 | |
589 - parentheses | |
590 | |
591 :: | |
592 | |
593 (, ) | |
594 | |
595 - logical operators | |
596 | |
597 :: | |
598 | |
599 && (same as &), ||, | | |
600 | |
601 - INFO tags, FORMAT tags, column names | |
602 | |
603 :: | |
604 | |
605 INFO/DP or DP | |
606 FORMAT/DV, FMT/DV, or DV | |
607 FILTER, QUAL, ID, REF, ALT[0] | |
608 | |
609 - 1 (or 0) to test the presence (or absence) of a flag | |
610 | |
611 :: | |
612 | |
613 FlagA=1 && FlagB=0 | |
614 | |
615 - "." to test missing values | |
616 | |
617 :: | |
618 | |
619 DP=".", DP!=".", ALT="." | |
620 | |
621 - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression | |
622 | |
623 :: | |
624 | |
625 GT="." | |
626 | |
627 - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other) | |
628 | |
629 :: | |
630 | |
631 TYPE="indel" | TYPE="snp" | |
632 | |
633 - array subscripts, "*" for any field | |
634 | |
635 :: | |
636 | |
637 (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3 | |
638 DP4[*] == 0 | |
639 CSQ[*] ~ "missense_variant.*deleterious" | |
640 | |
641 - function on FORMAT tags (over samples) and INFO tags (over vector fields) | |
642 | |
643 :: | |
644 | |
645 MAX, MIN, AVG, SUM, STRLEN, ABS | |
646 | |
647 - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes | |
648 | |
649 :: | |
650 | |
651 N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN | |
652 | |
653 **Notes:** | |
654 | |
655 - String comparisons and regular expressions are case-insensitive | |
656 - If the subscript "*" is used in regular expression search, the whole field | |
657 is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be | |
658 true for the string vector INFO/STR=AB,CD. | |
659 - Variables and function names are case-insensitive, but not tag names. For | |
660 example, "qual" can be used instead of "QUAL", "strlen()" instead of | |
661 "STRLEN()" , but not "dp" instead of "DP". | |
662 | |
663 **Examples:** | |
664 | |
665 :: | |
666 | |
667 MIN(DV)>5 | |
668 MIN(DV/DP)>0.3 | |
669 MIN(DP)>10 & MIN(DV)>3 | |
670 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample | |
671 FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples | |
672 QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples | |
673 QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites | |
674 TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2) | |
675 MIN(DP)>35 && AVG(GQ)>50 | |
676 ID=@file .. selects lines with ID present in the file | |
677 ID!=@~/file .. skip lines with ID present in the ~/file | |
678 MAF[0]<0.05 .. select rare variants at 5% cutoff | |
679 ]]></token> | |
680 </macros> |