comparison macros.xml @ 0:3b6cd8086498 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit c45135e52ae5039e09272ac6f504d0ceb574aa70
author iuc
date Sat, 23 Jul 2022 13:49:21 +0000
parents
children 70276425d001
comparison
equal deleted inserted replaced
-1:000000000000 0:3b6cd8086498
1 <macros>
2 <token name="@TOOL_VERSION@">1.10</token>
3 <xml name="bio_tools">
4 <xrefs>
5 <xref type="bio.tools">bcftools</xref>
6 </xrefs>
7 </xml>
8 <xml name="requirements">
9 <requirements>
10 <requirement type="package" version="@TOOL_VERSION@">bcftools</requirement>
11 <requirement type="package" version="1.10">htslib</requirement>
12 <yield />
13 </requirements>
14 </xml>
15 <xml name="samtools_requirement">
16 <requirement type="package" version="1.10">samtools</requirement>
17 </xml>
18 <xml name="matplotlib_requirement">
19 <requirement type="package" version="3.4.3">matplotlib</requirement>
20 </xml>
21 <xml name="version_command">
22 <version_command>bcftools 2&gt;&amp;1 | grep 'Version:'</version_command>
23 </xml>
24
25 <xml name="citations">
26 <citations>
27 <citation type="doi">10.1093/bioinformatics/btp352</citation>
28 <yield />
29 </citations>
30 </xml>
31 <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token>
32 <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token>
33 <token name="@THREADS@">
34 --threads \${GALAXY_SLOTS:-4}
35 </token>
36 <token name="@PREPARE_ENV@">
37 <![CDATA[
38 export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`;
39 ]]>
40 </token>
41 <xml name="macro_input">
42 <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf" label="VCF/BCF Data" />
43 </xml>
44 <token name="@PREPARE_INPUT_FILE@">
45 <![CDATA[
46 ## May need to symlink input if there is an associated
47 #set $input_vcf = 'input.vcf.gz'
48 #if $input_file.is_of_type('vcf')
49 bgzip -c '$input_file' > $input_vcf &&
50 bcftools index $input_vcf &&
51 #elif $input_file.is_of_type('vcf_bgzip')
52 ln -s '$input_file' $input_vcf &&
53 #if $input_file.metadata.tabix_index:
54 ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
55 #else
56 bcftools index $input_vcf &&
57 #end if
58 #elif $input_file.is_of_type('bcf')
59 #set $input_vcf = 'input.bcf'
60 ln -s '$input_file' $input_vcf &&
61 #if $input_file.metadata.bcf_index:
62 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
63 #else
64 bcftools index $input_vcf &&
65 #end if
66 #end if
67 ]]>
68 </token>
69 <token name="@INPUT_FILE@">
70 $input_vcf
71 </token>
72
73 <xml name="macro_inputs">
74 <param name="input_files" type="data" format="vcf,vcf_bgzip,bcf" label="Other VCF/BCF Datasets" multiple="True" />
75 </xml>
76 <token name="@PREPARE_INPUT_FILES@">
77 <![CDATA[
78 ## May need to symlink input if there is an associated
79 #set $input_vcfs = []
80 #set $vcfs_list_file = 'vcfs_list'
81 #for (i, input_file) in enumerate($input_files):
82 #set $input_vcf = 'input' + str($i) + '.vcf.gz'
83 #if $input_file.is_of_type('vcf')
84 bgzip -c '$input_file' > $input_vcf &&
85 bcftools index $input_vcf &&
86 #elif $input_file.is_of_type('vcf_bgzip')
87 ln -s '$input_file' $input_vcf &&
88 #if $input_file.metadata.tabix_index:
89 ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
90 #else
91 bcftools index $input_vcf &&
92 #end if
93 #elif $input_file.is_of_type('bcf')
94 #set $input_vcf = 'input' + str($i) + '.bcf.gz'
95 ln -s '$input_file' $input_vcf &&
96 #if $input_file.metadata.bcf_index:
97 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi &&
98 #else
99 bcftools index $input_vcf &&
100 #end if
101 #end if
102 echo '$input_vcf' >> $vcfs_list_file &&
103 $input_vcfs.append($input_vcf)
104 #end for
105 ]]>
106 </token>
107 <token name="@INPUT_FILES@">
108 #echo ' '.join($input_vcfs)#
109 </token>
110 <token name="@INPUT_LIST_FILE@">
111 $vcfs_list_file
112 </token>
113
114 <xml name="test_using_reference" token_select_from="history" token_ref="">
115 <conditional name="reference_source">
116 <param name="reference_source_selector" value="@SELECT_FROM@" />
117 <param name="fasta_ref" ftype="fasta" value="@REF@" />
118 </conditional>
119 </xml>
120
121 <xml name="macro_fasta_ref">
122 <conditional name="reference_source">
123 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
124 <option value="cached">Use a built-in genome</option>
125 <option value="history">Use a genome from the history</option>
126 </param>
127 <when value="cached">
128 <param name="fasta_ref" type="select" label="Reference genome">
129 <options from_data_table="fasta_indexes">
130 <filter type="data_meta" column="dbkey" key="dbkey" ref="input_file" />
131 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
132 </options>
133 </param>
134 </when>
135 <when value="history">
136 <param name="fasta_ref" type="data" format="fasta" label="Reference genome" />
137 </when>
138 </conditional>
139 </xml>
140 <token name="@PREPARE_FASTA_REF@">
141 <![CDATA[
142 #set $input_fa_ref = None
143 #if 'fasta_ref' in $section and $section.fasta_ref:
144 #if 'reference_source_selector' in $section:
145 #if str($section.reference_source_selector) == "history":
146 #set $input_fa_ref = 'ref.fa'
147 ln -s '$section.fasta_ref' $input_fa_ref &&
148 samtools faidx $input_fa_ref &&
149 #else:
150 #set $input_fa_ref = str($section.fasta_ref.fields.path)
151 #end if
152 #end if
153 #end if
154 ]]>
155 </token>
156 <token name="@FASTA_REF@">
157 #if $input_fa_ref is not None:
158 --fasta-ref $input_fa_ref
159 #elif 'fasta_ref' in $section and $section.fasta_ref:
160 --fasta-ref '${section.fasta_ref}'
161 #end if
162 </token>
163
164 <xml name="macro_AF_file">
165 <param name="AF_file" argument="--AF-file" type="data" format="tabular" optional="true" label="Allele frequencies file" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" />
166 </xml>
167 <!-- This may need to bgzip and tabix the file -->
168 <token name="@PREPARE_AF_FILE@">
169 <![CDATA[
170 #if 'AF_file' in $section and $section.AF_file:
171 #pass
172 #end if
173 ]]>
174 </token>
175 <token name="@AF_FILE@">
176 #if 'AF_file' in $section and $section.AF_file:
177 --AF-file '${section.AF_file}'
178 #end if
179 </token>
180
181 <xml name="macro_estimate_AF">
182 <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" optional="true" label="Estimate allele frequency" help="Calculate AC,AN counts on the fly, using either all samples (&quot;-&quot;) or samples listed in &lt;file&gt;" />
183 </xml>
184 <token name="@ESTIMATE_AF@">
185 #if 'estimate_AF' in $section and $section.estimate_AF:
186 --estimate-AF "${section.estimate_AF}"
187 #end if
188 </token>
189
190 <xml name="macro_exons_file">
191 <param name="exons_file" type="data" format="tabular" optional="true" label="Exons file" help="Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" />
192 </xml>
193 <token name="@PREPARE_EXONS_FILE@">
194 <![CDATA[
195 #set $exons_path = None
196 #if 'exons_file' in $section and $section.exons_file:
197 #set $exons_path = 'exons_file.tab.gz'
198 bgzip -c "$section.exons_file" > $exons_path &&
199 tabix -s 1 -b 2 -e 3 $exons_path &&
200 #end if
201 ]]>
202 </token>
203 <token name="@EXONS_FILE@">
204 #if 'exons_file' in $section and $section.exons_file:
205 --exons $exons_path
206 #end if
207 </token>
208
209 <xml name="macro_ploidy_file">
210 <param name="ploidy_file" type="data" format="tabular" optional="true" label="Ploidy file" help="Tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
211 </xml>
212 <token name="@PLOIDY_FILE@">
213 #if 'ploidy_file' in $section and $section.ploidy_file:
214 --ploidy "${section.ploidy_file}"
215 #end if
216 </token>
217
218 <xml name="macro_collapse_opt_none">
219 <option value="none">none - require the exact same set of alleles in all files</option>
220 </xml>
221 <xml name="macro_collapse_opt_id">
222 <option value="id">id - only records with identical ID column are compatible. </option>
223 </xml>
224 <xml name="macro_collapse">
225 <param name="collapse" type="select" optional="true" label="Collapse" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files">
226 <option value="snps">snps - allow different alleles, as long as they all are SNPs</option>
227 <option value="indels">indels - allow different alleles, as long as they all are indels</option>
228 <option value="both">both - indels and snps </option>
229 <option value="some">some - at least some of the ALTs must match</option>
230 <option value="any">any - any combination of alleles</option>
231 <yield/>
232 </param>
233 </xml>
234 <token name="@COLLAPSE@">
235 #if $section.collapse:
236 --collapse ${section.collapse}
237 #end if
238 </token>
239
240 <xml name="macro_apply_filters">
241 <param argument="--apply_filters" type="text" value="" optional="true" label="Apply filters"
242 help="Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
243 <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator>
244 </param>
245 </xml>
246 <token name="@APPLY_FILTERS@">
247 #if $section.apply_filters:
248 --apply-filters '${section.apply_filters}'
249 #end if
250 </token>
251
252 <xml name="macro_select_output_type">
253 <param name="output_type" type="select">
254 <option value="b">compressed BCF</option>
255 <!-- no galaxy datatypes for these
256 <option value="u">uncompressed BCF</option>
257 <option value="z">compressed VCF</option>
258 -->
259 <option value="v">uncompressed VCF</option>
260 </param>
261 </xml>
262 <token name="@OUTPUT_TYPE@">
263 #if str($output_type) != "__none__":
264 --output-type '${output_type}'
265 #end if
266 </token>
267
268 <xml name="macro_vcf_output">
269 <data name="output_file" format="vcf">
270 <change_format>
271 <when input="output_type" value="b" format="bcf" />
272 <when input="output_type" value="u" format="bcf" />
273 <when input="output_type" value="z" format="vcf_bgzip" />
274 <when input="output_type" value="v" format="vcf" />
275 </change_format>
276 </data>
277 </xml>
278
279 <xml name="macro_invert_targets">
280 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue=""
281 label="Invert Targets"
282 help="inverts the query/filtering applied by the targets" />
283 </xml>
284
285 <xml name="macro_restriction_spec" token_type="region" token_label_type="Region">
286 <repeat name="@TYPE@s" title="@LABEL_TYPE@ Filter" default="1" min="1">
287 <param name="chrom" type="text" label="@LABEL_TYPE@ chromosome">
288 <validator type="expression" message="A chromosome identifier is required when specifying a @LABEL_TYPE@ filter">value.strip()</validator>
289 </param>
290 <param name="start" type="text" label="@LABEL_TYPE@ start position">
291 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
292 </param>
293 <param name="stop" type="text" label="@LABEL_TYPE@ end position">
294 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator>
295 </param>
296 <yield />
297 </repeat>
298 </xml>
299
300 <xml name="macro_restrictions_file" token_type="region" token_label_type="Region">
301 <param name="@TYPE@s_file" type="data" format="tabular" label="@LABEL_TYPE@s File" help="restrict to @LABEL_TYPE@s listed in a file" />
302 </xml>
303
304 <xml name="macro_restrict" token_type="region" token_label_type="Region" >
305 <conditional name="@TYPE@s">
306 <param name="@TYPE@s_src" type="select" label="@LABEL_TYPE@s">
307 <option value="__none__">Do not restrict to @LABEL_TYPE@s</option>
308 <option value="@TYPE@s">Specify one or more @LABEL_TYPE@(s) directly</option>
309 <option value="@TYPE@s_file">Operate on @LABEL_TYPE@s specified in a history dataset</option>
310 </param>
311 <when value="__none__"/>
312 <when value="@TYPE@s">
313 <expand macro="macro_restriction_spec" type="@TYPE@" label_type="@LABEL_TYPE@" />
314 <yield />
315 </when>
316 <when value="@TYPE@s_file">
317 <expand macro="macro_restrictions_file" type="@TYPE@" label_type="@LABEL_TYPE@" />
318 <yield />
319 </when>
320 </conditional>
321 </xml>
322
323 <token name="@PARSE_INTERVALS@">
324 <![CDATA[
325 #set $components = []
326 #for $i in $intervals:
327 #set $chrom = str($i.chrom).strip()
328 #set $start = str($i.start).strip()
329 #set $stop = str($i.stop).strip()
330 #if $start or $stop:
331 $components.append($chrom + ':' + ($start or '0') + '-' + $stop)
332 #else:
333 $components.append($chrom)
334 #end if
335 #end for
336 #set $intervals_spec = ','.join($components)
337 ]]>
338 </token>
339
340 <token name="@REGIONS@">
341 <![CDATA[
342 #if $section.regions.regions_src == 'regions':
343 #set $intervals = $section.regions.regions
344 @PARSE_INTERVALS@
345 --regions '$intervals_spec'
346 #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
347 #if $regions_path is not None:
348 --regions-file '$regions_path'
349 #else:
350 --regions-file '$section.regions.regions_file'
351 #end if
352 #end if
353 ]]>
354 </token>
355
356 <token name="@TARGETS@">
357 <![CDATA[
358 #if $targets_path:
359 --targets-file "${section.targets.invert_targets_file}${targets_path}"
360 #elif $section.targets.targets_src == 'targets':
361 #set $intervals = $section.targets.targets
362 @PARSE_INTERVALS@
363 --targets '${section.targets.invert_targets_file}$intervals_spec'
364 #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file:
365 --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}"
366 #end if
367 ]]>
368 </token>
369
370 <token name="@PREPARE_REGIONS_FILE@">
371 <![CDATA[
372 #set $regions_path = None
373 #if 'regions' in $section
374 #if $section.regions.regions_src == 'regions_file' and $section.regions.regions_file:
375 #if $section.regions.regions_file.ext.startswith('bed'):
376 #set $regions_path = 'regions_file.bed'
377 ln -s '$section.regions.regions_file' $regions_path &&
378 #end if
379 #end if
380 #end if
381 ]]>
382 </token>
383
384 <token name="@PREPARE_TARGETS_FILE@">
385 <![CDATA[
386 #set $targets_path = None
387 #if 'targets' in $section
388 #if $section.targets.targets_src == 'targets_file':
389 #set $targets_path = 'targets_file.tab.gz'
390 bgzip -c "$section.targets.targets_file" > $targets_path &&
391 tabix -s 1 -b 2 -e 2 $targets_path &&
392 #end if
393 #elif $tgts_sec.targets_file:
394 #set $targets_path = 'targets_file.tab.gz'
395 bgzip -c "$section.targets_file" > $targets_path &&
396 tabix -s 1 -b 2 -e 2 $targets_path &&
397 #end if
398 ]]>
399 </token>
400
401 <token name="@TARGETS_FILE@">
402 <![CDATA[
403 #if $targets_path is not None:
404 --targets-file "${section.invert_targets_file}${targets_path}"
405 #elif $section.targets_file:
406 --targets-file "${section.invert_targets_file}${section.targets_file}"
407 #end if
408 ]]>
409 </token>
410
411 <xml name="macro_samples">
412 <param argument="--samples" type="text" value="" optional="true" label="Samples"
413 help="Comma separated list of samples to annotate (or exclude)">
414 <validator type="regex" message="">^(\w+(,\w+)*)?$</validator>
415 </param>
416 <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples"
417 help="Inverts the query/filtering applied by Samples (adds &quot;^&quot; prefix to exclude)" />
418 <param argument="--samples_file" type="data" format="tabular" optional="true" label="Samples file"
419 help="File of samples to include" />
420 <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples file"
421 help="inverts the query/filtering applied by Samples file" />
422 </xml>
423 <token name="@SAMPLES@">
424 #set $samples_defined = False
425 #if str($section.samples) != '':
426 #set $samples_defined = True
427 --samples '${section.invert_samples}${section.samples}'
428 #end if
429 #if $section.samples_file:
430 #set $samples_defined = True
431 --samples-file "${section.invert_samples_file}${section.samples_file}"
432 #end if
433 </token>
434
435 <xml name="macro_sample">
436 <param name="sample" type="text" optional="true" label="Sample" help="Apply variants of the given sample" />
437 </xml>
438 <token name="@SAMPLE@">
439 #if $section.sample:
440 --sample '${section.sample}'
441 #end if
442 </token>
443
444 <xml name="macro_include_exclude_validate_sanitize">
445 <validator type="expression" message="Single quote or trailing backslash not allowed">"'" not in value and value[-1] != "\\"</validator>
446 <sanitizer>
447 <valid initial="string.ascii_letters,string.digits,string.whitespace,string.punctuation">
448 <remove value="@" />
449 <remove value="'" />
450 </valid>
451 </sanitizer>
452 </xml>
453
454 <xml name="macro_include">
455 <param argument="--include" type="text" optional="true" label="Include" help="Select sites for which the expression is true">
456 <expand macro="macro_include_exclude_validate_sanitize" />
457 </param>
458 </xml>
459 <token name="@INCLUDE@">
460 #if $section.include:
461 --include '${section.include}'
462 #end if
463 </token>
464
465 <xml name="macro_exclude">
466 <param argument="--exclude" type="text" optional="true" label="Exclude" help="Exclude sites for which the expression is true">
467 <expand macro="macro_include_exclude_validate_sanitize" />
468 </param>
469 </xml>
470 <token name="@EXCLUDE@">
471 #if $section.exclude:
472 --exclude '${section.exclude}'
473 #end if
474 </token>
475
476 <xml name="macro_columns">
477 <param name="columns" type="text" value="" optional="true" label="Columns"
478 help="List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
479 <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator>
480 </param>
481 </xml>
482 <token name="@COLUMNS@">
483 #if $section.columns != '':
484 --columns '${section.columns}'
485 #end if
486 </token>
487
488 <xml name="macro_haploid2diploid">
489 <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" />
490 </xml>
491
492 <xml name="macro_vcf_ids">
493 <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" />
494 </xml>
495 <token name="@VCF_IDS@">
496 ${section.vcf_ids}
497 </token>
498
499 <token name="@OUTPUT_HELP@">
500 <![CDATA[
501 Output Type
502 -----------
503
504 Output compressed BCF (b), or uncompressed VCF (v).
505 Use the BCF option when piping between bcftools subcommands to speed up
506 performance by removing unecessary compression/decompression
507 and VCF<->BCF conversion.
508
509 This Galaxy tool recommends using the compressed BCF format
510 as piping is not implemented, and uncompressed data would
511 use unnecessary amounts of space.
512 ]]></token>
513 <token name="@REGIONS_HELP@">
514 <![CDATA[
515 Region Selections
516 -----------------
517
518 Regions can be specified in a VCF,
519 BED, or tab-delimited file (the default). The columns of the
520 tab-delimited file are: CHROM, POS, and, optionally, POS_TO,
521 where positions are 1-based and inclusive. Uncompressed
522 files are stored in memory, while bgzip-compressed and
523 tabix-indexed region files are streamed. Note that sequence
524 names must match exactly, "chr20" is not the same as "20".
525 Also note that chromosome ordering in FILE will be
526 respected, the VCF will be processed in the order in which
527 chromosomes first appear in FILE. However, within
528 chromosomes, the VCF will always be processed in ascending
529 genomic coordinate order no matter what order they appear in
530 FILE. Note that overlapping regions in FILE can result in
531 duplicated out of order positions in the output. This option
532 requires indexed VCF/BCF files.
533 ]]></token>
534 <token name="@TARGETS_HELP@"><![CDATA[
535 Targets
536 -------
537
538 Similar to regions, but the next position is accessed by streaming the whole
539 VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be
540 applied simultaneously: regions uses the index to jump to a region and targets discards
541 positions which are not in the targets. Unlike regions, targets can be prefixed with
542 "^" to request logical complement. For example, "^X,Y,MT" indicates that
543 sequences X, Y and MT should be skipped. Yet another difference between the two
544 is that regions checks both start and end positions of indels, whereas targets checks
545 start positions only.
546
547 For the bcftools call command, with the option -C alleles, third column of the
548 targets file must be comma-separated list of alleles, starting with the
549 reference allele. Note that the file must be compressed and index. Such a file
550 can be easily created from a VCF using::
551
552 bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz
553 ]]>
554 <!-- TODO: galaxy-ify -->
555 </token>
556
557
558 <token name="@COLLAPSE_HELP@">
559 Collapse
560 --------
561
562 Controls how to treat records with duplicate positions and defines compatible
563 records across multiple input files. Here by "compatible" we mean records which
564 should be considered as identical by the tools. For example, when performing
565 line intersections, the desire may be to consider as identical all sites with
566 matching positions (bcftools isec -c all), or only sites with matching variant
567 type (bcftools isec -c snps -c indels), or only sites with all alleles
568 identical (bcftools isec -c none).
569
570
571 +------------+----------------------------------------------------------------+
572 | Flag value | Result |
573 +============+================================================================+
574 | none | only records with identical REF and ALT alleles are compatible |
575 +------------+----------------------------------------------------------------+
576 | some | only records where some subset of ALT alleles match are |
577 | | compatible |
578 +------------+----------------------------------------------------------------+
579 | all | all records are compatible, regardless of whether the ALT |
580 | | alleles match or not. In the case of records with the same |
581 | | position, only the first wil lbe considered and appear on |
582 | | output. |
583 +------------+----------------------------------------------------------------+
584 | snps | any SNP records are compatible, regardless of whether the ALT |
585 | | alleles match or not. For duplicate positions, only the first |
586 | | SNP record will be considered and appear on output. |
587 +------------+----------------------------------------------------------------+
588 | indels | all indel records are compatible, regardless of whether the |
589 | | REF and ALT alleles match or not. For duplicate positions, |
590 | | only the first indel record will be considered and appear on |
591 | | output. |
592 +------------+----------------------------------------------------------------+
593 | both | abbreviation of "-c indels -c snps" |
594 +------------+----------------------------------------------------------------+
595 | id | only records with identical ID column are compatible. |
596 | | Supportedby bcftools merge only. |
597 +------------+----------------------------------------------------------------+
598 </token>
599
600 <token name="@EXPRESSIONS_HELP@">
601 <![CDATA[
602 Expressions
603 -----------
604
605 Valid expressions may contain:
606
607 - numerical constants, string constants
608
609 ::
610
611 1, 1.0, 1e-4
612 "String"
613
614 - arithmetic operators
615
616 ::
617
618 +,*,-,/
619
620 - comparison operators
621
622 ::
623
624 == (same as =), >, >=, <=, <, !=
625
626 - regex operators "~" and its negation "!~"
627
628 ::
629
630 INFO/HAYSTACK ~ "needle"
631
632 - parentheses
633
634 ::
635
636 (, )
637
638 - logical operators
639
640 ::
641
642 && (same as &), ||, |
643
644 - INFO tags, FORMAT tags, column names
645
646 ::
647
648 INFO/DP or DP
649 FORMAT/DV, FMT/DV, or DV
650 FILTER, QUAL, ID, REF, ALT[0]
651
652 - 1 (or 0) to test the presence (or absence) of a flag
653
654 ::
655
656 FlagA=1 && FlagB=0
657
658 - "." to test missing values
659
660 ::
661
662 DP=".", DP!=".", ALT="."
663
664 - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression
665
666 ::
667
668 GT="."
669
670 - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other)
671
672 ::
673
674 TYPE="indel" | TYPE="snp"
675
676 - array subscripts, "*" for any field
677
678 ::
679
680 (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3
681 DP4[*] == 0
682 CSQ[*] ~ "missense_variant.*deleterious"
683
684 - function on FORMAT tags (over samples) and INFO tags (over vector fields)
685
686 ::
687
688 MAX, MIN, AVG, SUM, STRLEN, ABS
689
690 - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes
691
692 ::
693
694 N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN
695
696 **Notes:**
697
698 - String comparisons and regular expressions are case-insensitive
699 - If the subscript "*" is used in regular expression search, the whole field
700 is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be
701 true for the string vector INFO/STR=AB,CD.
702 - Variables and function names are case-insensitive, but not tag names. For
703 example, "qual" can be used instead of "QUAL", "strlen()" instead of
704 "STRLEN()" , but not "dp" instead of "DP".
705
706 **Examples:**
707
708 ::
709
710 MIN(DV)>5
711 MIN(DV/DP)>0.3
712 MIN(DP)>10 & MIN(DV)>3
713 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample
714 FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples
715 QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples
716 QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites
717 TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2)
718 MIN(DP)>35 && AVG(GQ)>50
719 ID=@file .. selects lines with ID present in the file
720 ID!=@~/file .. skip lines with ID present in the ~/file
721 MAF[0]<0.05 .. select rare variants at 5% cutoff
722 ]]></token>
723 </macros>