comparison macros.xml @ 0:12f7c5315f7f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9d03fe38504a35d11660dadb44cb1beee32fcf4e
author iuc
date Thu, 13 Apr 2017 17:41:11 -0400
parents
children 9c711df3258d
comparison
equal deleted inserted replaced
-1:000000000000 0:12f7c5315f7f
1 <macros>
2 <token name="@VERSION@">1.4.0</token>
3 <xml name="stdio">
4 <stdio>
5 <exit_code range="1:" />
6 <exit_code range=":-1" />
7 <regex match="Error:" />
8 <regex match="Exception:" />
9 </stdio>
10 </xml>
11 <xml name="requirements">
12 <requirements>
13 <requirement type="package" version="1.4">bcftools</requirement>
14 <requirement type="package" version="1.4">htslib</requirement>
15 <yield />
16 </requirements>
17 </xml>
18 <xml name="samtools_requirement">
19 <requirement type="package" version="1.3.1">samtools</requirement>
20 </xml>
21 <xml name="version_command">
22 <version_command>bcftools 2&gt;&amp;1 | grep 'Version:'</version_command>
23 </xml>
24
25 <xml name="citations">
26 <citations>
27 <citation type="doi">10.1093/bioinformatics/btp352</citation>
28 <yield />
29 </citations>
30 </xml>
31 <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token>
32 <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token>
33 <token name="@THREADS@">
34 --threads \${GALAXY_SLOTS:-4}
35 </token>
36 <token name="@PREPARE_ENV@">
37 <![CDATA[
38 export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`;
39 ]]>
40 </token>
41 <xml name="macro_input">
42 <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" />
43 </xml>
44 <token name="@PREPARE_INPUT_FILE@">
45 <![CDATA[
46 ## May need to symlink input if there is an associated
47 #set $input_vcf = 'input.vcf.gz'
48 #if $input_file.is_of_type('vcf')
49 bgzip -c '$input_file' > $input_vcf &&
50 bcftools index $input_vcf &&
51 #elif $input_file.is_of_type('vcf_bgzip')
52 ln -s '$input_file' $input_vcf
53 #elif $input_file.is_of_type('bcf')
54 #set $input_vcf = 'input.bcf'
55 ln -s '$input_file' $input_vcf &&
56 #if $input_file.metadata.bcf_index:
57 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
58 #else
59 bcftools index $input_vcf &&
60 #end if
61 #elif $input_file.is_of_type('bcf_bgzip')
62 ln -s '$input_file' $input_vcf
63 #end if
64 ]]>
65 </token>
66 <token name="@INPUT_FILE@">
67 $input_vcf
68 </token>
69
70 <xml name="macro_inputs">
71 <param name="input_files" type="data" format="vcf,bcf" label="Other VCF/BCF Datasets" multiple="True" />
72 </xml>
73 <token name="@PREPARE_INPUT_FILES@">
74 <![CDATA[
75 ## May need to symlink input if there is an associated
76 #set $input_vcfs = []
77 #set $vcfs_list_file = 'vcfs_list'
78 #for (i, input_file) in enumerate($input_files):
79 #set $input_vcf = 'input' + str($i) + '.vcf.gz'
80 #if $input_file.is_of_type('vcf')
81 bgzip -c '$input_file' > $input_vcf &&
82 bcftools index $input_vcf &&
83 #elif $input_file.is_of_type('vcf_bgz')
84 ln -s '$input_file' $input_vcf
85 #elif $input_file.is_of_type('bcf')
86 #set $input_vcf = 'input' + str($i) + '.bcf.gz'
87 ln -s '$input_file' $input_vcf &&
88 #if $input_file.metadata.bcf_index:
89 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
90 #else
91 bcftools index $input_vcf &&
92 #end if
93 #elif $input_file.is_of_type('bcfvcf_bgz')
94 ln -s '$input_file' $input_vcf &&
95 #end if
96 echo '$input_vcf' >> $vcfs_list_file &&
97 $input_vcfs.append($input_vcf)
98 #end for
99 ]]>
100 </token>
101 <token name="@INPUT_FILES@">
102 #echo ' '.join($input_vcfs)#
103 </token>
104 <token name="@INPUT_LIST_FILE@">
105 $vcfs_list_file
106 </token>
107
108 <xml name="macro_fasta_ref">
109 <param name="fasta_ref" argument="--fasta-ref" type="data" format="data" label="Reference sequence in FASTA format" optional="True" />
110 </xml>
111 <token name="@PREPARE_FASTA_REF@">
112 <![CDATA[
113 #set $input_fa_ref = None
114 #if 'fasta_ref' in $section and $section.fasta_ref:
115 #set $input_fa_ref = 'ref.fa'
116 ln -s '$section.fasta_ref' $input_fa_ref &&
117 samtools faidx $input_fa_ref &&
118 #end if
119 ]]>
120 </token>
121 <token name="@FASTA_REF@">
122 #if $input_fa_ref is not None:
123 --fasta-ref $input_fa_ref
124 #elif 'fasta_ref' in $section and $section.fasta_ref:
125 --fasta-ref '${section.fasta_ref}'
126 #end if
127 </token>
128
129 <xml name="macro_ref_fasta">
130 <conditional name="reference_source">
131 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
132 <option value="cached">Locally cached</option>
133 <option value="history">History</option>
134 </param>
135 <when value="cached">
136 <param name="ref_file" type="select" label="Select reference genome">
137 <options from_data_table="fasta_indexes">
138 <!--<filter type="data_meta" key="dbkey" ref="input_bam" column="value"/>-->
139 </options>
140 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
141 </param>
142 </when>
143 <when value="history"> <!-- FIX ME!!!! -->
144 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
145 </when>
146 </conditional>
147 </xml>
148
149
150 <xml name="macro_AF_file">
151 <param name="AF_file" argument="--AF-file" type="data" format="tabular" label="Allele frequencies file" optional="True" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" />
152 </xml>
153 <!-- This may need to bgzip and tabix the file -->
154 <token name="@PREPARE_AF_FILE@">
155 <![CDATA[
156 #if 'AF_file' in $section and $section.AF_file:
157 #pass
158 #end if
159 ]]>
160 </token>
161 <token name="@AF_FILE@">
162 #if 'AF_file' in $section and $section.AF_file:
163 --AF-file '${section.AF_file}'
164 #end if
165 </token>
166
167 <xml name="macro_estimate_AF">
168 <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" label="Estimate allele frequency" optional="True" help="calculate AC,AN counts on the fly, using either all samples (&quot;-&quot;) or samples listed in &lt;file&gt;" />
169 </xml>
170 <token name="@ESTIMATE_AF@">
171 #if 'estimate_AF' in $section and $section.estimate_AF:
172 --estimate-AF "${section.estimate_AF}"
173 #end if
174 </token>
175
176 <xml name="macro_exons_file">
177 <param name="exons_file" type="data" format="tabular" label="exons file" optional="True" help="tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" />
178 </xml>
179 <token name="@PREPARE_EXONS_FILE@">
180 <![CDATA[
181 #set $exons_path = None
182 #if 'exons_file' in $section and $section.exons_file:
183 #set $exons_path = 'exons_file.tab.gz'
184 bgzip -c "$section.exons_file" > $exons_path &&
185 tabix -s 1 -b 2 -e 3 $exons_path &&
186 #end if
187 ]]>
188 </token>
189 <token name="@EXONS_FILE@">
190 #if 'exons_file' in $section and $section.exons_file:
191 --exons $exons_path
192 #end if
193 </token>
194
195 <xml name="macro_ploidy_file">
196 <param name="ploidy_file" type="data" format="tabular" label="Ploidy file" optional="True" help="tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
197 </xml>
198 <token name="@PLOIDY_FILE@">
199 #if 'ploidy_file' in $section and $section.ploidy_file:
200 --ploidy "${section.ploidy_file}"
201 #end if
202 </token>
203
204 <xml name="macro_collapse_opt_none">
205 <option value="none">none - require the exact same set of alleles in all files</option>
206 </xml>
207 <xml name="macro_collapse_opt_id">
208 <option value="id">id - only records with identical ID column are compatible. </option>
209 </xml>
210 <xml name="macro_collapse">
211 <param name="collapse" type="select" label="Collapse" optional="True" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files">
212 <option value="snps">snps - allow different alleles, as long as they all are SNPs</option>
213 <option value="indels">indels - allow different alleles, as long as they all are indels</option>
214 <option value="both">both - indels and snps </option>
215 <option value="some">some - at least some of the ALTs must match</option>
216 <option value="any">any - any combination of alleles</option>
217 <yield/>
218 </param>
219 </xml>
220 <token name="@COLLAPSE@">
221 #if $section.collapse:
222 --collapse ${section.collapse}
223 #end if
224 </token>
225
226 <xml name="macro_apply_filters">
227 <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true"
228 help="(-f --apply-filters) Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
229 <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator>
230 </param>
231 </xml>
232 <token name="@APPLY_FILTERS@">
233 #if $section.apply_filters:
234 --apply-filters '${section.apply_filters}'
235 #end if
236 </token>
237
238 <xml name="macro_select_output_type">
239 <param name="output_type" type="select">
240 <option value="b">compressed BCF</option>
241 <!-- no galaxy datatypes for these
242 <option value="u">uncompressed BCF</option>
243 <option value="z">compressed VCF</option>
244 -->
245 <option value="v">uncompressed VCF</option>
246 </param>
247 </xml>
248 <token name="@OUTPUT_TYPE@">
249 #if str($output_type) != "__none__":
250 --output-type '${output_type}'
251 #end if
252 </token>
253
254 <xml name="macro_vcf_output">
255 <data name="output_file" format="vcf">
256 <change_format>
257 <when input="output_type" value="b" format="bcf" />
258 <when input="output_type" value="u" format="bcf" />
259 <when input="output_type" value="z" format="vcf_bgzip" />
260 <when input="output_type" value="v" format="vcf" />
261 </change_format>
262 </data>
263 </xml>
264
265 <xml name="macro_regions">
266 <conditional name="regions">
267 <param name="regions_src" type="select" label="Regions">
268 <option value="__none__">None</option>
269 <option value="regions">regions</option>
270 <option value="regions_file">regions-file</option>
271 </param>
272 <when value="__none__"/>
273 <when value="regions">
274 <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true"
275 help="Each region is specifed as: chr or chr:pos or chr:from-to">
276 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
277 </param>
278 </when>
279 <when value="regions_file">
280 <param name="regions_file" type="data" format="vcf,bed,tabular" label="Regions File" optional="True" help="restrict to regions listed in a file" />
281 </when>
282 </conditional>
283 </xml>
284 <token name="@PREPARE_REGIONS_FILE@">
285 <![CDATA[
286 #set $regions_path = None
287 #if 'regions' in $section
288 #if $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
289 #if $section.regions.regions_file.ext.startswith('bed'):
290 #set $regions_path = 'regions_file.bed'
291 ln -s '$section.regions.regions_file' $regions_path &&
292 #end if
293 #end if
294 #end if
295 ]]>
296 </token>
297 <token name="@REGIONS@">
298 #if $section.regions.regions_src == 'regions' and $section.regions.regions != '':
299 --regions '$section.regions.regions'
300 #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
301 #if $regions_path is not None:
302 --regions-file '$regions_path'
303 #else:
304 --regions-file '$section.regions.regions_file'
305 #end if
306 #end if
307 </token>
308 <xml name="macro_targets_file">
309 <param name="targets_file" type="data" format="tabular" label="Targets File" help="restrict to targets listed in a file" >
310 <yield/>
311 </param>
312 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the target file selection" />
313 </xml>
314 <token name="@PREPARE_TARGETS_FILE@">
315 <![CDATA[
316 #set $targets_path = None
317 #if 'targets' in $section
318 #if $section.targets.targets_src == 'targets_file':
319 #set $targets_path = 'targets_file.tab.gz'
320 bgzip -c "$section.targets.targets_file" > $targets_path &&
321 tabix -s 1 -b 2 -e 2 $targets_path &&
322 #end if
323 #elif $tgts_sec.targets_file:
324 #set $targets_path = 'targets_file.tab.gz'
325 bgzip -c "$section.targets_file" > $targets_path &&
326 tabix -s 1 -b 2 -e 2 $targets_path &&
327 #end if
328 ]]>
329 </token>
330 <token name="@TARGETS_FILE@">
331 <![CDATA[
332 #if $targets_path is not None:
333 --targets-file "${section.invert_targets_file}${targets_path}"
334 #elif $section.targets_file:
335 --targets-file "${section.invert_targets_file}${section.targets_file}"
336 #end if
337 ]]>
338 </token>
339
340 <xml name="macro_targets">
341 <conditional name="targets">
342 <param name="targets_src" type="select" label="Targets">
343 <option value="__none__">None</option>
344 <option value="targets">targets</option>
345 <option value="targets_file">targets-file</option>
346 </param>
347 <when value="__none__"/>
348 <when value="targets">
349 <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true"
350 help="Each target is specifed as: chr or chr:pos or chr:from-to">
351 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
352 </param>
353 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the targets" />
354 </when>
355 <when value="targets_file">
356 <expand macro="macro_targets_file">
357 </expand>
358 </when>
359 </conditional>
360 </xml>
361 <token name="@TARGETS@">
362 <![CDATA[
363 #if $targets_path:
364 --targets-file "${section.targets.invert_targets_file}${targets_path}"
365 #else:
366 #if $section.targets.targets_src == 'targets' and $section.targets.targets != '':
367 --targets '${section.targets.invert_targets_file}${section.targets.targets}'
368 #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file:
369 --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}"
370 #end if
371 #end if
372 ]]>
373 </token>
374
375 <xml name="macro_samples">
376 <param name="samples" type="text" value="" label="Samples" optional="true"
377 help="(-s) comma separated list of samples to annotate (or exclude)">
378 <validator type="regex" message="">^(\w+(,\w+)*)?$</validator>
379 </param>
380 <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples"
381 help="inverts the query/filtering applied by Samples (adds &quot;^&quot; prefix to exclude)" />
382 <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True"
383 help="(-S) file of samples to include" />
384 <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File"
385 help="inverts the query/filtering applied by Samples File" />
386 </xml>
387 <token name="@SAMPLES@">
388 #set $samples_defined = False
389 #if str($section.samples) != '':
390 #set $samples_defined = True
391 --samples '${section.invert_samples}${section.samples}'
392 #end if
393 #if $section.samples_file:
394 #set $samples_defined = True
395 --samples-file "${section.invert_samples_file}${section.samples_file}"
396 #end if
397 </token>
398
399 <xml name="macro_sample">
400 <param name="sample" type="text" label="Sample" optional="True" help="apply variants of the given sample" />
401 </xml>
402 <token name="@SAMPLE@">
403 #if $section.sample:
404 --sample '${section.sample}'
405 #end if
406 </token>
407
408
409 <xml name="macro_include">
410 <param name="include" type="text" label="Include" optional="True" help="(-i) select sites for which the expression is true">
411 <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
412 <sanitizer sanitize="False"/>
413 </param>
414 </xml>
415 <token name="@INCLUDE@">
416 #if $section.include:
417 --include '${section.include}'
418 #end if
419 </token>
420
421 <xml name="macro_exclude">
422 <param name="exclude" type="text" label="Exclude" optional="True" help="(-e) exclude sites for which the expression is true">
423 <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
424 <sanitizer sanitize="False"/>
425 </param>
426 </xml>
427 <token name="@EXCLUDE@">
428 #if $section.exclude:
429 --exclude '${section.exclude}'
430 #end if
431 </token>
432
433 <xml name="macro_columns">
434 <param name="columns" type="text" value="" label="Columns" optional="true"
435 help="list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
436 <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator>
437 </param>
438 </xml>
439 <token name="@COLUMNS@">
440 #if $section.columns != '':
441 --columns '${section.columns}'
442 #end if
443 </token>
444
445 <xml name="macro_haploid2diploid">
446 <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" />
447 </xml>
448
449 <xml name="macro_vcf_ids">
450 <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" />
451 </xml>
452 <token name="@VCF_IDS@">
453 ${section.vcf_ids}
454 </token>
455
456 <token name="@OUTPUT_HELP@">
457 <![CDATA[
458 Output Type
459 -----------
460
461 Output compressed BCF (b), or uncompressed VCF (v).
462 Use the BCF option when piping between bcftools subcommands to speed up
463 performance by removing unecessary compression/decompression
464 and VCF<->BCF conversion.
465
466 This Galaxy tool recommends using the compressed BCF format
467 as piping is not implemented, and uncompressed data would
468 use unnecessary amounts of space.
469 ]]></token>
470 <token name="@REGIONS_HELP@">
471 <![CDATA[
472 Region Selections
473 -----------------
474
475 Regions can be specified in a VCF,
476 BED, or tab-delimited file (the default). The columns of the
477 tab-delimited file are: CHROM, POS, and, optionally, POS_TO,
478 where positions are 1-based and inclusive. Uncompressed
479 files are stored in memory, while bgzip-compressed and
480 tabix-indexed region files are streamed. Note that sequence
481 names must match exactly, "chr20" is not the same as "20".
482 Also note that chromosome ordering in FILE will be
483 respected, the VCF will be processed in the order in which
484 chromosomes first appear in FILE. However, within
485 chromosomes, the VCF will always be processed in ascending
486 genomic coordinate order no matter what order they appear in
487 FILE. Note that overlapping regions in FILE can result in
488 duplicated out of order positions in the output. This option
489 requires indexed VCF/BCF files.
490 ]]></token>
491 <token name="@TARGETS_HELP@"><![CDATA[
492 Targets
493 -------
494
495 Similar to regions, but the next position is accessed by streaming the whole
496 VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be
497 applied simultaneously: regions uses the index to jump to a region and targets discards
498 positions which are not in the targets. Unlike regions, targets can be prefixed with
499 "^" to request logical complement. For example, "^X,Y,MT" indicates that
500 sequences X, Y and MT should be skipped. Yet another difference between the two
501 is that regions checks both start and end positions of indels, whereas targets checks
502 start positions only.
503
504 For the bcftools call command, with the option -C alleles, third column of the
505 targets file must be comma-separated list of alleles, starting with the
506 reference allele. Note that the file must be compressed and index. Such a file
507 can be easily created from a VCF using::
508
509 bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
510 ]]>
511 <!-- TODO: galaxy-ify -->
512 </token>
513
514
515 <token name="@COLLAPSE_HELP@">
516 Collapse
517 --------
518
519 Controls how to treat records with duplicate positions and defines compatible
520 records across multiple input files. Here by "compatible" we mean records which
521 should be considered as identical by the tools. For example, when performing
522 line intersections, the desire may be to consider as identical all sites with
523 matching positions (bcftools isec -c all), or only sites with matching variant
524 type (bcftools isec -c snps -c indels), or only sites with all alleles
525 identical (bcftools isec -c none).
526
527
528 +------------+----------------------------------------------------------------+
529 | Flag value | Result |
530 +============+================================================================+
531 | none | only records with identical REF and ALT alleles are compatible |
532 +------------+----------------------------------------------------------------+
533 | some | only records where some subset of ALT alleles match are |
534 | | compatible |
535 +------------+----------------------------------------------------------------+
536 | all | all records are compatible, regardless of whether the ALT |
537 | | alleles match or not. In the case of records with the same |
538 | | position, only the first wil lbe considered and appear on |
539 | | output. |
540 +------------+----------------------------------------------------------------+
541 | snps | any SNP records are compatible, regardless of whether the ALT |
542 | | alleles match or not. For duplicate positions, only the first |
543 | | SNP record will be considered and appear on output. |
544 +------------+----------------------------------------------------------------+
545 | indels | all indel records are compatible, regardless of whether the |
546 | | REF and ALT alleles match or not. For duplicate positions, |
547 | | only the first indel record will be considered and appear on |
548 | | output. |
549 +------------+----------------------------------------------------------------+
550 | both | abbreviation of "-c indels -c snps" |
551 +------------+----------------------------------------------------------------+
552 | id | only records with identical ID column are compatible. |
553 | | Supportedby bcftools merge only. |
554 +------------+----------------------------------------------------------------+
555 </token>
556
557 <token name="@EXPRESSIONS_HELP@">
558 <![CDATA[
559 Expressions
560 -----------
561
562 Valid expressions may contain:
563
564 - numerical constants, string constants
565
566 ::
567
568 1, 1.0, 1e-4
569 "String"
570
571 - arithmetic operators
572
573 ::
574
575 +,*,-,/
576
577 - comparison operators
578
579 ::
580
581 == (same as =), >, >=, <=, <, !=
582
583 - regex operators "~" and its negation "!~"
584
585 ::
586
587 INFO/HAYSTACK ~ "needle"
588
589 - parentheses
590
591 ::
592
593 (, )
594
595 - logical operators
596
597 ::
598
599 && (same as &), ||, |
600
601 - INFO tags, FORMAT tags, column names
602
603 ::
604
605 INFO/DP or DP
606 FORMAT/DV, FMT/DV, or DV
607 FILTER, QUAL, ID, REF, ALT[0]
608
609 - 1 (or 0) to test the presence (or absence) of a flag
610
611 ::
612
613 FlagA=1 && FlagB=0
614
615 - "." to test missing values
616
617 ::
618
619 DP=".", DP!=".", ALT="."
620
621 - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
622
623 ::
624
625 GT="."
626
627 - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
628
629 ::
630
631 TYPE="indel" | TYPE="snp"
632
633 - array subscripts, "*" for any field
634
635 ::
636
637 (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
638 DP4[*] == 0
639 CSQ[*] ~ "missense_variant.*deleterious"
640
641 - function on FORMAT tags (over samples) and INFO tags (over vector fields)
642
643 ::
644
645 MAX, MIN, AVG, SUM, STRLEN, ABS
646
647 - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
648
649 ::
650
651 N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
652
653 **Notes:**
654
655 - String comparisons and regular expressions are case-insensitive
656 - If the subscript "*" is used in regular expression search, the whole field
657 is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
658 true for the string vector INFO/STR=AB,CD.
659 - Variables and function names are case-insensitive, but not tag names. For
660 example, "qual" can be used instead of "QUAL", "strlen()" instead of
661 "STRLEN()" , but not "dp" instead of "DP".
662
663 **Examples:**
664
665 ::
666
667 MIN(DV)>5
668 MIN(DV/DP)>0.3
669 MIN(DP)>10 & MIN(DV)>3
670 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
671 FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
672 QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
673 QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
674 TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
675 MIN(DP)>35 && AVG(GQ)>50
676 ID=@file .. selects lines with ID present in the file
677 ID!=@~/file .. skip lines with ID present in the ~/file
678 MAF[0]<0.05 .. select rare variants at 5% cutoff
679 ]]></token>
680 </macros>