comparison macros.xml @ 0:b068ef999550 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit ef90c4602bdb83ea7455946c9d175ea27284e643
author iuc
date Wed, 06 Jul 2016 07:02:49 -0400
parents
children b8926b599d45
comparison
equal deleted inserted replaced
-1:000000000000 0:b068ef999550
1
2 <macros>
3 <token name="@VERSION@">1.3</token>
4 <xml name="stdio">
5 <stdio>
6 <exit_code range="1:" />
7 <exit_code range=":-1" />
8 <regex match="Error:" />
9 <regex match="Exception:" />
10 </stdio>
11 </xml>
12 <xml name="requirements">
13 <requirements>
14 <requirement type="package" version="1.3">bcftools</requirement>
15 <!-- conda dependency -->
16 <requirement type="package" version="1.3">htslib</requirement>
17 <requirement type="package" version="0.2.6">tabix</requirement>
18 <requirement type="package" version="1.2">samtools</requirement>
19 </requirements>
20 </xml>
21 <xml name="version_command">
22 <version_command>bcftools 2&gt;&amp;1 | grep 'Version:'</version_command>
23 </xml>
24
25 <xml name="citations">
26 <citations>
27 <citation type="doi">10.1093/bioinformatics/btp352</citation>
28 <yield />
29 </citations>
30 </xml>
31 <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token>
32 <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token>
33 <token name="@THREADS@">
34 --threads \${GALAXY_SLOTS:-4}
35 </token>
36 <token name="@PREPARE_ENV@">
37 <![CDATA[
38 export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`;
39 ]]>
40 </token>
41 <xml name="macro_input">
42 <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" />
43 </xml>
44 <token name="@PREPARE_INPUT_FILE@">
45 <![CDATA[
46 ## May need to symlink input if there is an associated
47 #set $input_vcf = 'input.vcf.gz'
48 #if $input_file.datatype.file_ext == 'vcf'
49 bgzip -c "$input_file" > $input_vcf &&
50 bcftools index $input_vcf &&
51 #elif $input_file.datatype.file_ext == 'vcf_bgzip'
52 ln -s "$input_file" $input_vcf
53 #elif $input_file.datatype.file_ext == 'bcf'
54 #set $input_vcf = 'input.bcf'
55 ln -s "$input_file" $input_vcf &&
56 #if $input_file.metadata.bcf_index:
57 ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
58 #else
59 bcftools index $input_vcf &&
60 #end if
61 #elif $input_file.datatype.file_ext == 'bcf_bgzip'
62 ln -s "$input_file" $input_vcf
63 #end if
64 ]]>
65 </token>
66 <token name="@INPUT_FILE@">
67 $input_vcf
68 </token>
69
70 <xml name="macro_inputs">
71 <param name="input_files" type="data" format="vcf,bcf" label="Other VCF/BCF Datasets" multiple="True" />
72 </xml>
73 <token name="@PREPARE_INPUT_FILES@">
74 <![CDATA[
75 ## May need to symlink input if there is an associated
76 #set $input_vcfs = []
77 #set $vcfs_list_file = 'vcfs_list'
78 #for (i,input_file) in enumerate($input_files):
79 #set $input_vcf = 'input' + str($i) + '.vcf.gz'
80 echo '$input_vcf' >> $vcfs_list_file &&
81 #if $input_file.datatype.file_ext == 'vcf'
82 bgzip -c "$input_file" > $input_vcf &&
83 bcftools index $input_vcf &&
84 #elif $input_file.datatype.file_ext == 'vcf_bgz'
85 ln -s "$input_file" $input_vcf
86 #elif $input_file.datatype.file_ext == 'bcf'
87 #set $input_vcf = 'input' + str($i) + '.bcf.gz'
88 ## bgzip -c "$input_file" > $input_vcf &&
89 ln -s "$input_file" $input_vcf &&
90 #if $input_file.metadata.bcf_index:
91 ln -s $input_file.metadata.bcf_index ${input_vcf}.csi &&
92 #else
93 bcftools index $input_vcf &&
94 #end if
95 #elif $input_file.datatype.file_ext == 'bcfvcf_bgz'
96 ln -s "$input_file" $input_vcf &&
97 #end if
98 $input_vcfs.append($input_vcf)
99 #end for
100 ]]>
101 </token>
102 <token name="@INPUT_FILES@">
103 #echo ' '.join($input_vcfs)#
104 </token>
105 <token name="@INPUT_LIST_FILE@">
106 $vcfs_list_file
107 </token>
108
109 <xml name="macro_fasta_ref">
110 <param name="fasta_ref" type="data" format="data" label="Fasta Ref" optional="True" help="reference sequence in fasta format" />
111 </xml>
112 <token name="@PREPARE_FASTA_REF@">
113 <![CDATA[
114 #set $input_fa_ref = None
115 #if 'fasta_ref' in $section and $section.fasta_ref:
116 #set $input_fa_ref = 'ref.fa'
117 ln -s $section.fasta_ref $input_fa_ref &&
118 samtools faidx $input_fa_ref &&
119 #end if
120 ]]>
121 </token>
122 <token name="@FASTA_REF@">
123 #if $input_fa_ref is not None:
124 --fasta-ref "$input_fa_ref"
125 #elif 'fasta_ref' in $section and $section.fasta_ref:
126 --fasta-ref "${section.fasta_ref}"
127 #end if
128 </token>
129
130 <xml name="macro_ref_fasta">
131 <conditional name="reference_source">
132 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
133 <option value="cached">Locally cached</option>
134 <option value="history">History</option>
135 </param>
136 <when value="cached">
137 <param name="ref_file" type="select" label="Select reference genome">
138 <options from_data_table="fasta_indexes">
139 <!--<filter type="data_meta" key="dbkey" ref="input_bam" column="value"/>-->
140 </options>
141 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
142 </param>
143 </when>
144 <when value="history"> <!-- FIX ME!!!! -->
145 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
146 </when>
147 </conditional>
148 </xml>
149
150
151 <xml name="macro_AF_file">
152 <param name="AF_file" type="data" format="data" label="Af File" optional="True" help="read allele frequencies from file (CHR\tPOS\tREF,ALT\tAF)" />
153 </xml>
154 <!-- This may need to bgzip and tabix the file -->
155 <token name="@PREPARE_AF_FILE@">
156 <![CDATA[
157 #if 'AF_file' in $section and $section.AF_file:
158 #pass
159 #end if
160 ]]>
161 </token>
162 <token name="@AF_FILE@">
163 #if 'AF_file' in $section and $section.AF_file:
164 --AF-file "${section.AF_file}"
165 #end if
166 </token>
167
168 <xml name="macro_estimate_AF">
169 <param name="estimate_AF" type="data" format="data" label="Estimate Af" optional="True" help="calculate AC,AN counts on the fly, using either all samples (&quot;-&quot;) or samples listed in &lt;file&gt;" />
170 </xml>
171 <token name="@ESTIMATE_AF@">
172 #if 'estimate_AF' in $section and $section.estimate_AF:
173 --estimate-AF "${section.estimate_AF}"
174 #end if
175 </token>
176
177 <xml name="macro_exons_file">
178 <param name="exons_file" type="data" format="tabular" label="exons file" optional="True" help="tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" />
179 </xml>
180 <token name="@PREPARE_EXONS_FILE@">
181 <![CDATA[
182 #set $exons_path = None
183 #if 'exons_file' in $section and $section.exons_file:
184 #set $exons_path = 'exons_file.tab.gz'
185 bgzip -c "$section.exons_file" > $exons_path &&
186 tabix -s 1 -b 2 -e 3 $exons_path &&
187 #end if
188 ]]>
189 </token>
190 <token name="@EXONS_FILE@">
191 #if 'exons_file' in $section and $section.exons_file:
192 --exons $exons_path
193 #end if
194 </token>
195
196 <xml name="macro_ploidy_file">
197 <param name="ploidy_file" type="data" format="tabular" label="Ploidy file" optional="True" help="tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
198 </xml>
199 <token name="@PLOIDY_FILE@">
200 #if 'ploidy_file' in $section and $section.ploidy_file:
201 --ploidy "${section.ploidy_file}"
202 #end if
203 </token>
204
205 <xml name="macro_collapse_opt_none">
206 <option value="none">none - require the exact same set of alleles in all files</option>
207 </xml>
208 <xml name="macro_collapse_opt_id">
209 <option value="id">id - only records with identical ID column are compatible. </option>
210 </xml>
211 <xml name="macro_collapse">
212 <param name="collapse" type="select" label="Collapse" optional="True" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files">
213 <option value="snps">snps - allow different alleles, as long as they all are SNPs</option>
214 <option value="indels">indels - allow different alleles, as long as they all are indels</option>
215 <option value="both">both - indels and snps </option>
216 <option value="some">some - at least some of the ALTs must match</option>
217 <option value="any">any - any combination of alleles</option>
218 <yield/>
219 </param>
220 </xml>
221 <token name="@COLLAPSE@">
222 #if $section.collapse:
223 --collapse "${section.collapse}"
224 #end if
225 </token>
226
227 <xml name="macro_apply_filters">
228 <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true"
229 help="(-f --apply-filters) Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
230 <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator>
231 </param>
232 </xml>
233 <token name="@APPLY_FILTERS@">
234 #if $section.apply_filters:
235 --apply-filters "${section.apply_filters}"
236 #end if
237 </token>
238
239 <xml name="macro_select_output_type">
240 <param name="output_type" type="select">
241 <option value="b">compressed BCF</option>
242 <!-- no galaxy datatypes for these
243 <option value="u">uncompressed BCF</option>
244 <option value="z">compressed VCF</option>
245 -->
246 <option value="v">uncompressed VCF</option>
247 </param>
248 </xml>
249 <token name="@OUTPUT_TYPE@">
250 #if str($output_type) != "__none__":
251 --output-type "${output_type}"
252 #end if
253 </token>
254
255 <xml name="macro_vcf_output">
256 <data name="output_file" format="vcf">
257 <change_format>
258 <when input="output_type" value="b" format="bcf" />
259 <when input="output_type" value="u" format="bcf" />
260 <when input="output_type" value="z" format="vcf_bgzip" />
261 <when input="output_type" value="v" format="vcf" />
262 </change_format>
263 </data>
264 </xml>
265
266 <xml name="macro_regions">
267 <conditional name="regions">
268 <param name="regions_src" type="select" label="Regions">
269 <option value="__none__">None</option>
270 <option value="regions">regions</option>
271 <option value="regions_file">regions-file</option>
272 </param>
273 <when value="__none__"/>
274 <when value="regions">
275 <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true"
276 help="Each region is specifed as: chr or chr:pos or chr:from-to">
277 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
278 </param>
279 </when>
280 <when value="regions_file">
281 <param name="regions_file" type="data" format="vcf,bed,tabular" label="Regions File" optional="True" help="restrict to regions listed in a file" />
282 </when>
283 </conditional>
284 </xml>
285 <token name="@REGIONS@">
286 #if $section.regions.regions_src == 'regions' and $section.regions.regions != '':
287 --regions "$section.regions.regions"
288 #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
289 --regions-file "$section.regions.regions_file"
290 #end if
291 </token>
292
293 <xml name="macro_targets_file">
294 <param name="targets_file" type="data" format="tabular" label="Targets File" help="restrict to targets listed in a file" >
295 <yield/>
296 </param>
297 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the target file selection" />
298 </xml>
299 <token name="@PREPARE_TARGETS_FILE@">
300 <![CDATA[
301 #set $targets_path = None
302 #if 'targets' in $section
303 #if $section.targets.targets_src == 'targets_file':
304 #set $targets_path = 'targets_file.tab.gz'
305 bgzip -c "$section.targets.targets_file" > $targets_path &&
306 tabix -s 1 -b 2 -e 2 $targets_path &&
307 #end if
308 #elif $tgts_sec.targets_file:
309 #set $targets_path = 'targets_file.tab.gz'
310 bgzip -c "$section.targets_file" > $targets_path &&
311 tabix -s 1 -b 2 -e 2 $targets_path &&
312 #end if
313 ]]>
314 </token>
315 <token name="@TARGETS_FILE@">
316 <![CDATA[
317 #if $targets_path is not None:
318 --targets-file "${section.invert_targets_file}${targets_path}"
319 #elif $section.targets_file:
320 --targets-file "${section.invert_targets_file}${section.targets_file}"
321 #end if
322 ]]>
323 </token>
324
325 <xml name="macro_targets">
326 <conditional name="targets">
327 <param name="targets_src" type="select" label="Targets">
328 <option value="__none__">None</option>
329 <option value="targets">targets</option>
330 <option value="targets_file">targets-file</option>
331 </param>
332 <when value="__none__"/>
333 <when value="targets">
334 <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true"
335 help="Each target is specifed as: chr or chr:pos or chr:from-to">
336 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
337 </param>
338 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets" help="inverts the query/filtering applied by the targets" />
339 </when>
340 <when value="targets_file">
341 <expand macro="macro_targets_file">
342 <optional>true</optional>
343 </expand>
344 </when>
345 </conditional>
346 </xml>
347 <token name="@TARGETS@">
348 <![CDATA[
349 #if $targets_path:
350 --targets-file "${section.targets.invert_targets_file}${targets_path}"
351 #else:
352 #if $section.targets.targets_src == 'targets' and $section.targets.targets != '':
353 --targets "${section.targets.invert_targets_file}${section.targets.targets}"
354 #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file:
355 --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}"
356 #end if
357 #end if
358 ]]>
359 </token>
360
361 <xml name="macro_samples">
362 <param name="samples" type="text" value="" label="Samples" optional="true"
363 help="(-s) comma separated list of samples to annotate (or exclude with &quot;^&quot; prefix)">
364 <validator type="regex" message="">^(\w+(,\w+)*)?$</validator>
365 </param>
366 <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples"
367 help="inverts the query/filtering applied by Samples" />
368 <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True"
369 help="(-S) file of samples to include" />
370 <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File"
371 help="inverts the query/filtering applied by Samples File" />
372 </xml>
373 <token name="@SAMPLES@">
374 #set $samples_defined = False
375 #if str($section.samples) != '':
376 #set $samples_defined = True
377 --samples "${section.invert_samples}${section.samples}"
378 #end if
379 #if $section.samples_file:
380 #set $samples_defined = True
381 --samples-file "${section.invert_samples_file}${section.samples_file}"
382 #end if
383 </token>
384
385 <xml name="macro_sample">
386 <param name="sample" type="text" label="Sample" optional="True" help="apply variants of the given sample" />
387 </xml>
388 <token name="@SAMPLE@">
389 #if $section.sample:
390 --sample "${section.sample}"
391 #end if
392 </token>
393
394
395 <xml name="macro_include">
396 <param name="include" type="text" label="Include" optional="True" help="(-i) select sites for which the expression is true">
397 <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
398 <sanitizer sanitize="False"/>
399 </param>
400 </xml>
401 <token name="@INCLUDE@">
402 #if $section.include:
403 --include '${section.include}'
404 #end if
405 </token>
406
407 <xml name="macro_exclude">
408 <param name="exclude" type="text" label="Exclude" optional="True" help="(-e) exclude sites for which the expression is true">
409 <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
410 <sanitizer sanitize="False"/>
411 </param>
412 </xml>
413 <token name="@EXCLUDE@">
414 #if $section.exclude:
415 --exclude '${section.exclude}'
416 #end if
417 </token>
418
419 <xml name="macro_columns">
420 <param name="columns" type="text" value="" label="Columns" optional="true"
421 help="list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
422 <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator>
423 </param>
424 </xml>
425 <token name="@COLUMNS@">
426 #if $section.columns != '':
427 --columns "${section.columns}"
428 #end if
429 </token>
430
431 <xml name="macro_haploid2diploid">
432 <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" />
433 </xml>
434
435 <xml name="macro_vcf_ids">
436 <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" />
437 </xml>
438 <token name="@VCF_IDS@">
439 ${section.vcf_ids}
440 </token>
441
442 <token name="@OUTPUT_HELP@">
443 <![CDATA[
444 Output Type
445 -----------
446
447 Output compressed BCF (b), or uncompressed VCF (v).
448 Use the BCF option when piping between bcftools subcommands to speed up
449 performance by removing unecessary compression/decompression
450 and VCF<->BCF conversion.
451
452 This Galaxy tool recommends using the compressed BCF format
453 as piping is not implemented, and uncompressed data would
454 use unnecessary amounts of space.
455
456 ]]></token>
457 <token name="@REGIONS_HELP@">
458 <![CDATA[
459 Region Selections
460 -----------------
461
462 Regions can be specified in a VCF,
463 BED, or tab-delimited file (the default). The columns of the
464 tab-delimited file are: CHROM, POS, and, optionally, POS_TO,
465 where positions are 1-based and inclusive. Uncompressed
466 files are stored in memory, while bgzip-compressed and
467 tabix-indexed region files are streamed. Note that sequence
468 names must match exactly, "chr20" is not the same as "20".
469 Also note that chromosome ordering in FILE will be
470 respected, the VCF will be processed in the order in which
471 chromosomes first appear in FILE. However, within
472 chromosomes, the VCF will always be processed in ascending
473 genomic coordinate order no matter what order they appear in
474 FILE. Note that overlapping regions in FILE can result in
475 duplicated out of order positions in the output. This option
476 requires indexed VCF/BCF files.
477
478 ]]></token>
479 <token name="@TARGETS_HELP@"><![CDATA[
480 Targets
481 -------
482
483 Similar to regions, but the next position is accessed by streaming the whole
484 VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be
485 applied simultaneously: regions uses the index to jump to a region and targets discards
486 positions which are not in the targets. Unlike regions, targets can be prefixed with
487 "^" to request logical complement. For example, "^X,Y,MT" indicates that
488 sequences X, Y and MT should be skipped. Yet another difference between the two
489 is that regions checks both start and end positions of indels, whereas targets checks
490 start positions only.
491
492 For the bcftools call command, with the option -C alleles, third column of the
493 targets file must be comma-separated list of alleles, starting with the
494 reference allele. Note that the file must be compressed and index. Such a file
495 can be easily created from a VCF using::
496
497 bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
498 ]]>
499 <!-- TODO: galaxy-ify -->
500 </token>
501
502
503 <token name="@COLLAPSE_HELP@">
504 Collapse
505 --------
506
507 Controls how to treat records with duplicate positions and defines compatible
508 records across multiple input files. Here by "compatible" we mean records which
509 should be considered as identical by the tools. For example, when performing
510 line intersections, the desire may be to consider as identical all sites with
511 matching positions (bcftools isec -c all), or only sites with matching variant
512 type (bcftools isec -c snps -c indels), or only sites with all alleles
513 identical (bcftools isec -c none).
514
515
516 +------------+----------------------------------------------------------------+
517 | Flag value | Result |
518 +============+================================================================+
519 | none | only records with identical REF and ALT alleles are compatible |
520 +------------+----------------------------------------------------------------+
521 | some | only records where some subset of ALT alleles match are |
522 | | compatible |
523 +------------+----------------------------------------------------------------+
524 | all | all records are compatible, regardless of whether the ALT |
525 | | alleles match or not. In the case of records with the same |
526 | | position, only the first wil lbe considered and appear on |
527 | | output. |
528 +------------+----------------------------------------------------------------+
529 | snps | any SNP records are compatible, regardless of whether the ALT |
530 | | alleles match or not. For duplicate positions, only the first |
531 | | SNP record will be considered and appear on output. |
532 +------------+----------------------------------------------------------------+
533 | indels | all indel records are compatible, regardless of whether the |
534 | | REF and ALT alleles match or not. For duplicate positions, |
535 | | only the first indel record will be considered and appear on |
536 | | output. |
537 +------------+----------------------------------------------------------------+
538 | both | abbreviation of "-c indels -c snps" |
539 +------------+----------------------------------------------------------------+
540 | id | only records with identical ID column are compatible. |
541 | | Supportedby bcftools merge only. |
542 +------------+----------------------------------------------------------------+
543
544 </token>
545
546 <token name="@EXPRESSIONS_HELP@">
547 <![CDATA[
548 Expressions
549 -----------
550
551 Valid expressions may contain:
552
553 - numerical constants, string constants
554
555 ::
556
557 1, 1.0, 1e-4
558 "String"
559
560 - arithmetic operators
561
562 ::
563
564 +,*,-,/
565
566 - comparison operators
567
568 ::
569
570 == (same as =), >, >=, <=, <, !=
571
572 - regex operators "~" and its negation "!~"
573
574 ::
575
576 INFO/HAYSTACK ~ "needle"
577
578 - parentheses
579
580 ::
581
582 (, )
583
584 - logical operators
585
586 ::
587
588 && (same as &), ||, |
589
590 - INFO tags, FORMAT tags, column names
591
592 ::
593
594 INFO/DP or DP
595 FORMAT/DV, FMT/DV, or DV
596 FILTER, QUAL, ID, REF, ALT[0]
597
598 - 1 (or 0) to test the presence (or absence) of a flag
599
600 ::
601
602 FlagA=1 && FlagB=0
603
604 - "." to test missing values
605
606 ::
607
608 DP=".", DP!=".", ALT="."
609
610 - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
611
612 ::
613
614 GT="."
615
616 - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
617
618 ::
619
620 TYPE="indel" | TYPE="snp"
621
622 - array subscripts, "*" for any field
623
624 ::
625
626 (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
627 DP4[*] == 0
628 CSQ[*] ~ "missense_variant.*deleterious"
629
630 - function on FORMAT tags (over samples) and INFO tags (over vector fields)
631
632 ::
633
634 MAX, MIN, AVG, SUM, STRLEN, ABS
635
636 - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
637
638 ::
639
640 N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
641
642 **Notes:**
643
644 - String comparisons and regular expressions are case-insensitive
645 - If the subscript "*" is used in regular expression search, the whole field
646 is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
647 true for the string vector INFO/STR=AB,CD.
648 - Variables and function names are case-insensitive, but not tag names. For
649 example, "qual" can be used instead of "QUAL", "strlen()" instead of
650 "STRLEN()" , but not "dp" instead of "DP".
651
652 **Examples:**
653
654 ::
655
656 MIN(DV)>5
657 MIN(DV/DP)>0.3
658 MIN(DP)>10 & MIN(DV)>3
659 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
660 FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
661 QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
662 QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
663 TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
664 MIN(DP)>35 && AVG(GQ)>50
665 ID=@file .. selects lines with ID present in the file
666 ID!=@~/file .. skip lines with ID present in the ~/file
667 MAF[0]<0.05 .. select rare variants at 5% cutoff
668
669 ]]></token>
670
671
672
673
674 </macros>