Mercurial > repos > iuc > bcftools_plugin_split_vep
comparison macros.xml @ 0:3b6cd8086498 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit c45135e52ae5039e09272ac6f504d0ceb574aa70
author | iuc |
---|---|
date | Sat, 23 Jul 2022 13:49:21 +0000 |
parents | |
children | 70276425d001 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3b6cd8086498 |
---|---|
1 <macros> | |
2 <token name="@TOOL_VERSION@">1.10</token> | |
3 <xml name="bio_tools"> | |
4 <xrefs> | |
5 <xref type="bio.tools">bcftools</xref> | |
6 </xrefs> | |
7 </xml> | |
8 <xml name="requirements"> | |
9 <requirements> | |
10 <requirement type="package" version="@TOOL_VERSION@">bcftools</requirement> | |
11 <requirement type="package" version="1.10">htslib</requirement> | |
12 <yield /> | |
13 </requirements> | |
14 </xml> | |
15 <xml name="samtools_requirement"> | |
16 <requirement type="package" version="1.10">samtools</requirement> | |
17 </xml> | |
18 <xml name="matplotlib_requirement"> | |
19 <requirement type="package" version="3.4.3">matplotlib</requirement> | |
20 </xml> | |
21 <xml name="version_command"> | |
22 <version_command>bcftools 2>&1 | grep 'Version:'</version_command> | |
23 </xml> | |
24 | |
25 <xml name="citations"> | |
26 <citations> | |
27 <citation type="doi">10.1093/bioinformatics/btp352</citation> | |
28 <yield /> | |
29 </citations> | |
30 </xml> | |
31 <token name="@BCFTOOLS_WIKI@">https://github.com/samtools/bcftools/wiki</token> | |
32 <token name="@BCFTOOLS_MANPAGE@">http://samtools.github.io/bcftools/bcftools.html</token> | |
33 <token name="@THREADS@"> | |
34 --threads \${GALAXY_SLOTS:-4} | |
35 </token> | |
36 <token name="@PREPARE_ENV@"> | |
37 <![CDATA[ | |
38 export BCFTOOLS_PLUGINS=`which bcftools | sed 's,bin/bcftools,libexec/bcftools,'`; | |
39 ]]> | |
40 </token> | |
41 <xml name="macro_input"> | |
42 <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf" label="VCF/BCF Data" /> | |
43 </xml> | |
44 <token name="@PREPARE_INPUT_FILE@"> | |
45 <![CDATA[ | |
46 ## May need to symlink input if there is an associated | |
47 #set $input_vcf = 'input.vcf.gz' | |
48 #if $input_file.is_of_type('vcf') | |
49 bgzip -c '$input_file' > $input_vcf && | |
50 bcftools index $input_vcf && | |
51 #elif $input_file.is_of_type('vcf_bgzip') | |
52 ln -s '$input_file' $input_vcf && | |
53 #if $input_file.metadata.tabix_index: | |
54 ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && | |
55 #else | |
56 bcftools index $input_vcf && | |
57 #end if | |
58 #elif $input_file.is_of_type('bcf') | |
59 #set $input_vcf = 'input.bcf' | |
60 ln -s '$input_file' $input_vcf && | |
61 #if $input_file.metadata.bcf_index: | |
62 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && | |
63 #else | |
64 bcftools index $input_vcf && | |
65 #end if | |
66 #end if | |
67 ]]> | |
68 </token> | |
69 <token name="@INPUT_FILE@"> | |
70 $input_vcf | |
71 </token> | |
72 | |
73 <xml name="macro_inputs"> | |
74 <param name="input_files" type="data" format="vcf,vcf_bgzip,bcf" label="Other VCF/BCF Datasets" multiple="True" /> | |
75 </xml> | |
76 <token name="@PREPARE_INPUT_FILES@"> | |
77 <![CDATA[ | |
78 ## May need to symlink input if there is an associated | |
79 #set $input_vcfs = [] | |
80 #set $vcfs_list_file = 'vcfs_list' | |
81 #for (i, input_file) in enumerate($input_files): | |
82 #set $input_vcf = 'input' + str($i) + '.vcf.gz' | |
83 #if $input_file.is_of_type('vcf') | |
84 bgzip -c '$input_file' > $input_vcf && | |
85 bcftools index $input_vcf && | |
86 #elif $input_file.is_of_type('vcf_bgzip') | |
87 ln -s '$input_file' $input_vcf && | |
88 #if $input_file.metadata.tabix_index: | |
89 ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi && | |
90 #else | |
91 bcftools index $input_vcf && | |
92 #end if | |
93 #elif $input_file.is_of_type('bcf') | |
94 #set $input_vcf = 'input' + str($i) + '.bcf.gz' | |
95 ln -s '$input_file' $input_vcf && | |
96 #if $input_file.metadata.bcf_index: | |
97 ln -s '${input_file.metadata.bcf_index}' ${input_vcf}.csi && | |
98 #else | |
99 bcftools index $input_vcf && | |
100 #end if | |
101 #end if | |
102 echo '$input_vcf' >> $vcfs_list_file && | |
103 $input_vcfs.append($input_vcf) | |
104 #end for | |
105 ]]> | |
106 </token> | |
107 <token name="@INPUT_FILES@"> | |
108 #echo ' '.join($input_vcfs)# | |
109 </token> | |
110 <token name="@INPUT_LIST_FILE@"> | |
111 $vcfs_list_file | |
112 </token> | |
113 | |
114 <xml name="test_using_reference" token_select_from="history" token_ref=""> | |
115 <conditional name="reference_source"> | |
116 <param name="reference_source_selector" value="@SELECT_FROM@" /> | |
117 <param name="fasta_ref" ftype="fasta" value="@REF@" /> | |
118 </conditional> | |
119 </xml> | |
120 | |
121 <xml name="macro_fasta_ref"> | |
122 <conditional name="reference_source"> | |
123 <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> | |
124 <option value="cached">Use a built-in genome</option> | |
125 <option value="history">Use a genome from the history</option> | |
126 </param> | |
127 <when value="cached"> | |
128 <param name="fasta_ref" type="select" label="Reference genome"> | |
129 <options from_data_table="fasta_indexes"> | |
130 <filter type="data_meta" column="dbkey" key="dbkey" ref="input_file" /> | |
131 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> | |
132 </options> | |
133 </param> | |
134 </when> | |
135 <when value="history"> | |
136 <param name="fasta_ref" type="data" format="fasta" label="Reference genome" /> | |
137 </when> | |
138 </conditional> | |
139 </xml> | |
140 <token name="@PREPARE_FASTA_REF@"> | |
141 <![CDATA[ | |
142 #set $input_fa_ref = None | |
143 #if 'fasta_ref' in $section and $section.fasta_ref: | |
144 #if 'reference_source_selector' in $section: | |
145 #if str($section.reference_source_selector) == "history": | |
146 #set $input_fa_ref = 'ref.fa' | |
147 ln -s '$section.fasta_ref' $input_fa_ref && | |
148 samtools faidx $input_fa_ref && | |
149 #else: | |
150 #set $input_fa_ref = str($section.fasta_ref.fields.path) | |
151 #end if | |
152 #end if | |
153 #end if | |
154 ]]> | |
155 </token> | |
156 <token name="@FASTA_REF@"> | |
157 #if $input_fa_ref is not None: | |
158 --fasta-ref $input_fa_ref | |
159 #elif 'fasta_ref' in $section and $section.fasta_ref: | |
160 --fasta-ref '${section.fasta_ref}' | |
161 #end if | |
162 </token> | |
163 | |
164 <xml name="macro_AF_file"> | |
165 <param name="AF_file" argument="--AF-file" type="data" format="tabular" optional="true" label="Allele frequencies file" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" /> | |
166 </xml> | |
167 <!-- This may need to bgzip and tabix the file --> | |
168 <token name="@PREPARE_AF_FILE@"> | |
169 <![CDATA[ | |
170 #if 'AF_file' in $section and $section.AF_file: | |
171 #pass | |
172 #end if | |
173 ]]> | |
174 </token> | |
175 <token name="@AF_FILE@"> | |
176 #if 'AF_file' in $section and $section.AF_file: | |
177 --AF-file '${section.AF_file}' | |
178 #end if | |
179 </token> | |
180 | |
181 <xml name="macro_estimate_AF"> | |
182 <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" optional="true" label="Estimate allele frequency" help="Calculate AC,AN counts on the fly, using either all samples ("-") or samples listed in <file>" /> | |
183 </xml> | |
184 <token name="@ESTIMATE_AF@"> | |
185 #if 'estimate_AF' in $section and $section.estimate_AF: | |
186 --estimate-AF "${section.estimate_AF}" | |
187 #end if | |
188 </token> | |
189 | |
190 <xml name="macro_exons_file"> | |
191 <param name="exons_file" type="data" format="tabular" optional="true" label="Exons file" help="Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" /> | |
192 </xml> | |
193 <token name="@PREPARE_EXONS_FILE@"> | |
194 <![CDATA[ | |
195 #set $exons_path = None | |
196 #if 'exons_file' in $section and $section.exons_file: | |
197 #set $exons_path = 'exons_file.tab.gz' | |
198 bgzip -c "$section.exons_file" > $exons_path && | |
199 tabix -s 1 -b 2 -e 3 $exons_path && | |
200 #end if | |
201 ]]> | |
202 </token> | |
203 <token name="@EXONS_FILE@"> | |
204 #if 'exons_file' in $section and $section.exons_file: | |
205 --exons $exons_path | |
206 #end if | |
207 </token> | |
208 | |
209 <xml name="macro_ploidy_file"> | |
210 <param name="ploidy_file" type="data" format="tabular" optional="true" label="Ploidy file" help="Tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" /> | |
211 </xml> | |
212 <token name="@PLOIDY_FILE@"> | |
213 #if 'ploidy_file' in $section and $section.ploidy_file: | |
214 --ploidy "${section.ploidy_file}" | |
215 #end if | |
216 </token> | |
217 | |
218 <xml name="macro_collapse_opt_none"> | |
219 <option value="none">none - require the exact same set of alleles in all files</option> | |
220 </xml> | |
221 <xml name="macro_collapse_opt_id"> | |
222 <option value="id">id - only records with identical ID column are compatible. </option> | |
223 </xml> | |
224 <xml name="macro_collapse"> | |
225 <param name="collapse" type="select" optional="true" label="Collapse" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files"> | |
226 <option value="snps">snps - allow different alleles, as long as they all are SNPs</option> | |
227 <option value="indels">indels - allow different alleles, as long as they all are indels</option> | |
228 <option value="both">both - indels and snps </option> | |
229 <option value="some">some - at least some of the ALTs must match</option> | |
230 <option value="any">any - any combination of alleles</option> | |
231 <yield/> | |
232 </param> | |
233 </xml> | |
234 <token name="@COLLAPSE@"> | |
235 #if $section.collapse: | |
236 --collapse ${section.collapse} | |
237 #end if | |
238 </token> | |
239 | |
240 <xml name="macro_apply_filters"> | |
241 <param argument="--apply_filters" type="text" value="" optional="true" label="Apply filters" | |
242 help="Skip sites where FILTER column does not contain any of the strings listed (e.g. "PASS,.")"> | |
243 <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator> | |
244 </param> | |
245 </xml> | |
246 <token name="@APPLY_FILTERS@"> | |
247 #if $section.apply_filters: | |
248 --apply-filters '${section.apply_filters}' | |
249 #end if | |
250 </token> | |
251 | |
252 <xml name="macro_select_output_type"> | |
253 <param name="output_type" type="select"> | |
254 <option value="b">compressed BCF</option> | |
255 <!-- no galaxy datatypes for these | |
256 <option value="u">uncompressed BCF</option> | |
257 <option value="z">compressed VCF</option> | |
258 --> | |
259 <option value="v">uncompressed VCF</option> | |
260 </param> | |
261 </xml> | |
262 <token name="@OUTPUT_TYPE@"> | |
263 #if str($output_type) != "__none__": | |
264 --output-type '${output_type}' | |
265 #end if | |
266 </token> | |
267 | |
268 <xml name="macro_vcf_output"> | |
269 <data name="output_file" format="vcf"> | |
270 <change_format> | |
271 <when input="output_type" value="b" format="bcf" /> | |
272 <when input="output_type" value="u" format="bcf" /> | |
273 <when input="output_type" value="z" format="vcf_bgzip" /> | |
274 <when input="output_type" value="v" format="vcf" /> | |
275 </change_format> | |
276 </data> | |
277 </xml> | |
278 | |
279 <xml name="macro_invert_targets"> | |
280 <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" | |
281 label="Invert Targets" | |
282 help="inverts the query/filtering applied by the targets" /> | |
283 </xml> | |
284 | |
285 <xml name="macro_restriction_spec" token_type="region" token_label_type="Region"> | |
286 <repeat name="@TYPE@s" title="@LABEL_TYPE@ Filter" default="1" min="1"> | |
287 <param name="chrom" type="text" label="@LABEL_TYPE@ chromosome"> | |
288 <validator type="expression" message="A chromosome identifier is required when specifying a @LABEL_TYPE@ filter">value.strip()</validator> | |
289 </param> | |
290 <param name="start" type="text" label="@LABEL_TYPE@ start position"> | |
291 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> | |
292 </param> | |
293 <param name="stop" type="text" label="@LABEL_TYPE@ end position"> | |
294 <validator type="expression" message="an integer number is required">not value or value.isdigit()</validator> | |
295 </param> | |
296 <yield /> | |
297 </repeat> | |
298 </xml> | |
299 | |
300 <xml name="macro_restrictions_file" token_type="region" token_label_type="Region"> | |
301 <param name="@TYPE@s_file" type="data" format="tabular" label="@LABEL_TYPE@s File" help="restrict to @LABEL_TYPE@s listed in a file" /> | |
302 </xml> | |
303 | |
304 <xml name="macro_restrict" token_type="region" token_label_type="Region" > | |
305 <conditional name="@TYPE@s"> | |
306 <param name="@TYPE@s_src" type="select" label="@LABEL_TYPE@s"> | |
307 <option value="__none__">Do not restrict to @LABEL_TYPE@s</option> | |
308 <option value="@TYPE@s">Specify one or more @LABEL_TYPE@(s) directly</option> | |
309 <option value="@TYPE@s_file">Operate on @LABEL_TYPE@s specified in a history dataset</option> | |
310 </param> | |
311 <when value="__none__"/> | |
312 <when value="@TYPE@s"> | |
313 <expand macro="macro_restriction_spec" type="@TYPE@" label_type="@LABEL_TYPE@" /> | |
314 <yield /> | |
315 </when> | |
316 <when value="@TYPE@s_file"> | |
317 <expand macro="macro_restrictions_file" type="@TYPE@" label_type="@LABEL_TYPE@" /> | |
318 <yield /> | |
319 </when> | |
320 </conditional> | |
321 </xml> | |
322 | |
323 <token name="@PARSE_INTERVALS@"> | |
324 <![CDATA[ | |
325 #set $components = [] | |
326 #for $i in $intervals: | |
327 #set $chrom = str($i.chrom).strip() | |
328 #set $start = str($i.start).strip() | |
329 #set $stop = str($i.stop).strip() | |
330 #if $start or $stop: | |
331 $components.append($chrom + ':' + ($start or '0') + '-' + $stop) | |
332 #else: | |
333 $components.append($chrom) | |
334 #end if | |
335 #end for | |
336 #set $intervals_spec = ','.join($components) | |
337 ]]> | |
338 </token> | |
339 | |
340 <token name="@REGIONS@"> | |
341 <![CDATA[ | |
342 #if $section.regions.regions_src == 'regions': | |
343 #set $intervals = $section.regions.regions | |
344 @PARSE_INTERVALS@ | |
345 --regions '$intervals_spec' | |
346 #elif $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: | |
347 #if $regions_path is not None: | |
348 --regions-file '$regions_path' | |
349 #else: | |
350 --regions-file '$section.regions.regions_file' | |
351 #end if | |
352 #end if | |
353 ]]> | |
354 </token> | |
355 | |
356 <token name="@TARGETS@"> | |
357 <![CDATA[ | |
358 #if $targets_path: | |
359 --targets-file "${section.targets.invert_targets_file}${targets_path}" | |
360 #elif $section.targets.targets_src == 'targets': | |
361 #set $intervals = $section.targets.targets | |
362 @PARSE_INTERVALS@ | |
363 --targets '${section.targets.invert_targets_file}$intervals_spec' | |
364 #elif $section.targets.targets_src == 'targets_file' and $section.targets.targets_file: | |
365 --targets-file "${section.targets.invert_targets_file}${section.targets.targets_file}" | |
366 #end if | |
367 ]]> | |
368 </token> | |
369 | |
370 <token name="@PREPARE_REGIONS_FILE@"> | |
371 <![CDATA[ | |
372 #set $regions_path = None | |
373 #if 'regions' in $section | |
374 #if $section.regions.regions_src == 'regions_file' and $section.regions.regions_file: | |
375 #if $section.regions.regions_file.ext.startswith('bed'): | |
376 #set $regions_path = 'regions_file.bed' | |
377 ln -s '$section.regions.regions_file' $regions_path && | |
378 #end if | |
379 #end if | |
380 #end if | |
381 ]]> | |
382 </token> | |
383 | |
384 <token name="@PREPARE_TARGETS_FILE@"> | |
385 <![CDATA[ | |
386 #set $targets_path = None | |
387 #if 'targets' in $section | |
388 #if $section.targets.targets_src == 'targets_file': | |
389 #set $targets_path = 'targets_file.tab.gz' | |
390 bgzip -c "$section.targets.targets_file" > $targets_path && | |
391 tabix -s 1 -b 2 -e 2 $targets_path && | |
392 #end if | |
393 #elif $tgts_sec.targets_file: | |
394 #set $targets_path = 'targets_file.tab.gz' | |
395 bgzip -c "$section.targets_file" > $targets_path && | |
396 tabix -s 1 -b 2 -e 2 $targets_path && | |
397 #end if | |
398 ]]> | |
399 </token> | |
400 | |
401 <token name="@TARGETS_FILE@"> | |
402 <![CDATA[ | |
403 #if $targets_path is not None: | |
404 --targets-file "${section.invert_targets_file}${targets_path}" | |
405 #elif $section.targets_file: | |
406 --targets-file "${section.invert_targets_file}${section.targets_file}" | |
407 #end if | |
408 ]]> | |
409 </token> | |
410 | |
411 <xml name="macro_samples"> | |
412 <param argument="--samples" type="text" value="" optional="true" label="Samples" | |
413 help="Comma separated list of samples to annotate (or exclude)"> | |
414 <validator type="regex" message="">^(\w+(,\w+)*)?$</validator> | |
415 </param> | |
416 <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples" | |
417 help="Inverts the query/filtering applied by Samples (adds "^" prefix to exclude)" /> | |
418 <param argument="--samples_file" type="data" format="tabular" optional="true" label="Samples file" | |
419 help="File of samples to include" /> | |
420 <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples file" | |
421 help="inverts the query/filtering applied by Samples file" /> | |
422 </xml> | |
423 <token name="@SAMPLES@"> | |
424 #set $samples_defined = False | |
425 #if str($section.samples) != '': | |
426 #set $samples_defined = True | |
427 --samples '${section.invert_samples}${section.samples}' | |
428 #end if | |
429 #if $section.samples_file: | |
430 #set $samples_defined = True | |
431 --samples-file "${section.invert_samples_file}${section.samples_file}" | |
432 #end if | |
433 </token> | |
434 | |
435 <xml name="macro_sample"> | |
436 <param name="sample" type="text" optional="true" label="Sample" help="Apply variants of the given sample" /> | |
437 </xml> | |
438 <token name="@SAMPLE@"> | |
439 #if $section.sample: | |
440 --sample '${section.sample}' | |
441 #end if | |
442 </token> | |
443 | |
444 <xml name="macro_include_exclude_validate_sanitize"> | |
445 <validator type="expression" message="Single quote or trailing backslash not allowed">"'" not in value and value[-1] != "\\"</validator> | |
446 <sanitizer> | |
447 <valid initial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"> | |
448 <remove value="@" /> | |
449 <remove value="'" /> | |
450 </valid> | |
451 </sanitizer> | |
452 </xml> | |
453 | |
454 <xml name="macro_include"> | |
455 <param argument="--include" type="text" optional="true" label="Include" help="Select sites for which the expression is true"> | |
456 <expand macro="macro_include_exclude_validate_sanitize" /> | |
457 </param> | |
458 </xml> | |
459 <token name="@INCLUDE@"> | |
460 #if $section.include: | |
461 --include '${section.include}' | |
462 #end if | |
463 </token> | |
464 | |
465 <xml name="macro_exclude"> | |
466 <param argument="--exclude" type="text" optional="true" label="Exclude" help="Exclude sites for which the expression is true"> | |
467 <expand macro="macro_include_exclude_validate_sanitize" /> | |
468 </param> | |
469 </xml> | |
470 <token name="@EXCLUDE@"> | |
471 #if $section.exclude: | |
472 --exclude '${section.exclude}' | |
473 #end if | |
474 </token> | |
475 | |
476 <xml name="macro_columns"> | |
477 <param name="columns" type="text" value="" optional="true" label="Columns" | |
478 help="List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details"> | |
479 <validator type="regex" message="COLUMN names separated by commas">^([^,]+(,[^,]+)*)?$</validator> | |
480 </param> | |
481 </xml> | |
482 <token name="@COLUMNS@"> | |
483 #if $section.columns != '': | |
484 --columns '${section.columns}' | |
485 #end if | |
486 </token> | |
487 | |
488 <xml name="macro_haploid2diploid"> | |
489 <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" label="Haploid2Diploid" help="convert haploid genotypes to diploid homozygotes" /> | |
490 </xml> | |
491 | |
492 <xml name="macro_vcf_ids"> | |
493 <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" label="Vcf Ids" help="output VCF IDs instead of CHROM:POS_REF_ALT" /> | |
494 </xml> | |
495 <token name="@VCF_IDS@"> | |
496 ${section.vcf_ids} | |
497 </token> | |
498 | |
499 <token name="@OUTPUT_HELP@"> | |
500 <![CDATA[ | |
501 Output Type | |
502 ----------- | |
503 | |
504 Output compressed BCF (b), or uncompressed VCF (v). | |
505 Use the BCF option when piping between bcftools subcommands to speed up | |
506 performance by removing unecessary compression/decompression | |
507 and VCF<->BCF conversion. | |
508 | |
509 This Galaxy tool recommends using the compressed BCF format | |
510 as piping is not implemented, and uncompressed data would | |
511 use unnecessary amounts of space. | |
512 ]]></token> | |
513 <token name="@REGIONS_HELP@"> | |
514 <![CDATA[ | |
515 Region Selections | |
516 ----------------- | |
517 | |
518 Regions can be specified in a VCF, | |
519 BED, or tab-delimited file (the default). The columns of the | |
520 tab-delimited file are: CHROM, POS, and, optionally, POS_TO, | |
521 where positions are 1-based and inclusive. Uncompressed | |
522 files are stored in memory, while bgzip-compressed and | |
523 tabix-indexed region files are streamed. Note that sequence | |
524 names must match exactly, "chr20" is not the same as "20". | |
525 Also note that chromosome ordering in FILE will be | |
526 respected, the VCF will be processed in the order in which | |
527 chromosomes first appear in FILE. However, within | |
528 chromosomes, the VCF will always be processed in ascending | |
529 genomic coordinate order no matter what order they appear in | |
530 FILE. Note that overlapping regions in FILE can result in | |
531 duplicated out of order positions in the output. This option | |
532 requires indexed VCF/BCF files. | |
533 ]]></token> | |
534 <token name="@TARGETS_HELP@"><![CDATA[ | |
535 Targets | |
536 ------- | |
537 | |
538 Similar to regions, but the next position is accessed by streaming the whole | |
539 VCF/BCF rather than using the tbi/csi index. Both regions and targets options can be | |
540 applied simultaneously: regions uses the index to jump to a region and targets discards | |
541 positions which are not in the targets. Unlike regions, targets can be prefixed with | |
542 "^" to request logical complement. For example, "^X,Y,MT" indicates that | |
543 sequences X, Y and MT should be skipped. Yet another difference between the two | |
544 is that regions checks both start and end positions of indels, whereas targets checks | |
545 start positions only. | |
546 | |
547 For the bcftools call command, with the option -C alleles, third column of the | |
548 targets file must be comma-separated list of alleles, starting with the | |
549 reference allele. Note that the file must be compressed and index. Such a file | |
550 can be easily created from a VCF using:: | |
551 | |
552 bcftools query -f'%CHROM\t%POS\t%REF,%ALT\n' file.vcf | bgzip -c > als.tsv.gz && tabix -s1 -b2 -e2 als.tsv.gz | |
553 ]]> | |
554 <!-- TODO: galaxy-ify --> | |
555 </token> | |
556 | |
557 | |
558 <token name="@COLLAPSE_HELP@"> | |
559 Collapse | |
560 -------- | |
561 | |
562 Controls how to treat records with duplicate positions and defines compatible | |
563 records across multiple input files. Here by "compatible" we mean records which | |
564 should be considered as identical by the tools. For example, when performing | |
565 line intersections, the desire may be to consider as identical all sites with | |
566 matching positions (bcftools isec -c all), or only sites with matching variant | |
567 type (bcftools isec -c snps -c indels), or only sites with all alleles | |
568 identical (bcftools isec -c none). | |
569 | |
570 | |
571 +------------+----------------------------------------------------------------+ | |
572 | Flag value | Result | | |
573 +============+================================================================+ | |
574 | none | only records with identical REF and ALT alleles are compatible | | |
575 +------------+----------------------------------------------------------------+ | |
576 | some | only records where some subset of ALT alleles match are | | |
577 | | compatible | | |
578 +------------+----------------------------------------------------------------+ | |
579 | all | all records are compatible, regardless of whether the ALT | | |
580 | | alleles match or not. In the case of records with the same | | |
581 | | position, only the first wil lbe considered and appear on | | |
582 | | output. | | |
583 +------------+----------------------------------------------------------------+ | |
584 | snps | any SNP records are compatible, regardless of whether the ALT | | |
585 | | alleles match or not. For duplicate positions, only the first | | |
586 | | SNP record will be considered and appear on output. | | |
587 +------------+----------------------------------------------------------------+ | |
588 | indels | all indel records are compatible, regardless of whether the | | |
589 | | REF and ALT alleles match or not. For duplicate positions, | | |
590 | | only the first indel record will be considered and appear on | | |
591 | | output. | | |
592 +------------+----------------------------------------------------------------+ | |
593 | both | abbreviation of "-c indels -c snps" | | |
594 +------------+----------------------------------------------------------------+ | |
595 | id | only records with identical ID column are compatible. | | |
596 | | Supportedby bcftools merge only. | | |
597 +------------+----------------------------------------------------------------+ | |
598 </token> | |
599 | |
600 <token name="@EXPRESSIONS_HELP@"> | |
601 <![CDATA[ | |
602 Expressions | |
603 ----------- | |
604 | |
605 Valid expressions may contain: | |
606 | |
607 - numerical constants, string constants | |
608 | |
609 :: | |
610 | |
611 1, 1.0, 1e-4 | |
612 "String" | |
613 | |
614 - arithmetic operators | |
615 | |
616 :: | |
617 | |
618 +,*,-,/ | |
619 | |
620 - comparison operators | |
621 | |
622 :: | |
623 | |
624 == (same as =), >, >=, <=, <, != | |
625 | |
626 - regex operators "~" and its negation "!~" | |
627 | |
628 :: | |
629 | |
630 INFO/HAYSTACK ~ "needle" | |
631 | |
632 - parentheses | |
633 | |
634 :: | |
635 | |
636 (, ) | |
637 | |
638 - logical operators | |
639 | |
640 :: | |
641 | |
642 && (same as &), ||, | | |
643 | |
644 - INFO tags, FORMAT tags, column names | |
645 | |
646 :: | |
647 | |
648 INFO/DP or DP | |
649 FORMAT/DV, FMT/DV, or DV | |
650 FILTER, QUAL, ID, REF, ALT[0] | |
651 | |
652 - 1 (or 0) to test the presence (or absence) of a flag | |
653 | |
654 :: | |
655 | |
656 FlagA=1 && FlagB=0 | |
657 | |
658 - "." to test missing values | |
659 | |
660 :: | |
661 | |
662 DP=".", DP!=".", ALT="." | |
663 | |
664 - missing genotypes can be matched regardless of phase and ploidy (".|.", "./.", ".") using this expression | |
665 | |
666 :: | |
667 | |
668 GT="." | |
669 | |
670 - TYPE for variant type in REF,ALT columns (indel,snp,mnp,ref,other) | |
671 | |
672 :: | |
673 | |
674 TYPE="indel" | TYPE="snp" | |
675 | |
676 - array subscripts, "*" for any field | |
677 | |
678 :: | |
679 | |
680 (DP4[0]+DP4[1])/(DP4[2]+DP4[3]) > 0.3 | |
681 DP4[*] == 0 | |
682 CSQ[*] ~ "missense_variant.*deleterious" | |
683 | |
684 - function on FORMAT tags (over samples) and INFO tags (over vector fields) | |
685 | |
686 :: | |
687 | |
688 MAX, MIN, AVG, SUM, STRLEN, ABS | |
689 | |
690 - variables calculated on the fly if not present: number of alternate alleles; number of samples; count of alternate alleles; minor allele count (similar to AC but is always smaller than 0.5); frequency of alternate alleles (AF=AC/AN); frequency of minor alleles (MAF=MAC/AN); number of alleles in called genotypes | |
691 | |
692 :: | |
693 | |
694 N_ALT, N_SAMPLES, AC, MAC, AF, MAF, AN | |
695 | |
696 **Notes:** | |
697 | |
698 - String comparisons and regular expressions are case-insensitive | |
699 - If the subscript "*" is used in regular expression search, the whole field | |
700 is treated as one string. For example, the regex ``STR[*]~"B,C"`` will be | |
701 true for the string vector INFO/STR=AB,CD. | |
702 - Variables and function names are case-insensitive, but not tag names. For | |
703 example, "qual" can be used instead of "QUAL", "strlen()" instead of | |
704 "STRLEN()" , but not "dp" instead of "DP". | |
705 | |
706 **Examples:** | |
707 | |
708 :: | |
709 | |
710 MIN(DV)>5 | |
711 MIN(DV/DP)>0.3 | |
712 MIN(DP)>10 & MIN(DV)>3 | |
713 FMT/DP>10 & FMT/GQ>10 .. both conditions must be satisfied within one sample | |
714 FMT/DP>10 && FMT/GQ>10 .. the conditions can be satisfied in different samples | |
715 QUAL>10 | FMT/GQ>10 .. selects only GQ>10 samples | |
716 QUAL>10 || FMT/GQ>10 .. selects all samples at QUAL>10 sites | |
717 TYPE="snp" && QUAL>=10 && (DP4[2]+DP4[3] > 2) | |
718 MIN(DP)>35 && AVG(GQ)>50 | |
719 ID=@file .. selects lines with ID present in the file | |
720 ID!=@~/file .. skip lines with ID present in the ~/file | |
721 MAF[0]<0.05 .. select rare variants at 5% cutoff | |
722 ]]></token> | |
723 </macros> |