comparison variant_filtration.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
comparison
equal deleted inserted replaced
14:68426930d59c 15:01ff8dd37d4d
1 <tool id="gatk2_variant_filtration" name="Variant Filtration" version="@VERSION@.0">
2 <description>on VCF files</description>
3 <macros>
4 <import>gatk2_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_command" />
8 <command interpreter="python">
9 #from binascii import hexlify
10
11 gatk2_wrapper.py
12 --stdout "${output_log}"
13 -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
14 -p '
15 @JAR_PATH@
16 -T "VariantFiltration"
17 \$GATK2_SITE_OPTIONS
18
19 -o "${output_vcf}"
20
21 #if $reference_source.reference_source_selector != "history":
22 -R "${reference_source.ref_file.fields.path}"
23 #end if
24 '
25 #for $variant_filter in $variant_filters:
26 #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name ) )
27 -o '${ hexlify( $variant_filter ) }'
28 #end for
29
30 #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask':
31 -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}"
32 -p '
33 --maskExtension "${mask_rod_bind_type.mask_extension}"
34 --maskName "${mask_rod_bind_type.mask_rod_name}"
35 '
36 #end if
37
38 #include source=$standard_gatk_options#
39
40 ##start analysis specific options
41 #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp":
42 -p '
43 --clusterSize "${cluster_snp_type.cluster_size}"
44 --clusterWindowSize "${cluster_snp_type.cluster_window_size}"
45 '
46 #end if
47 -p '${missing_values_in_expressions_should_evaluate_as_failing}'
48 </command>
49 <inputs>
50 <conditional name="reference_source">
51 <expand macro="reference_source_selector_param" />
52 <when value="cached">
53 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
54 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
55 <options from_data_table="gatk2_picard_indexes">
56 <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
57 </options>
58 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
59 </param>
60 </when>
61 <when value="history"> <!-- FIX ME!!!! -->
62 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
63 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
64 </when>
65 </conditional>
66
67 <repeat name="variant_filters" title="Variant Filters">
68 <param name="filter_expression" value="AB &lt; 0.2 || MQ0 &gt; 50" type="text" label="Filter expression" help="JEXL formatted expressions (-filter,--filterExpression &amp;lt;filterExpression&amp;gt;)">
69 <sanitizer>
70 <valid initial="string.printable">
71 <remove value="&apos;"/>
72 </valid>
73 <mapping initial="none"/>
74 </sanitizer>
75 </param>
76 <param name="filter_name" value="custom_filter" type="text" label="Filter name" help="-filterName,--filterName &amp;lt;filterName&amp;gt;"/>
77 <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" help="Use -G_filter,--genotypeFilterExpression &amp;lt;genotypeFilterExpression&amp;gt; and -G_filterName,--genotypeFilterName &amp;lt;genotypeFilterName&amp;gt; for filter type" />
78 </repeat>
79
80 <conditional name="mask_rod_bind_type">
81 <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
82 <option value="set_mask" selected="True">Set mask</option>
83 <option value="exclude_mask">Don't set mask</option>
84 </param>
85 <when value="exclude_mask">
86 <!-- Do nothing here -->
87 </when>
88 <when value="set_mask">
89 <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" help="--mask &amp;lt;mask&amp;gt;" />
90 <param name="mask_rod_name" type="text" value="Mask" label="Mask Name" help="-maskName,--maskName &amp;lt;maskName&amp;gt;"/>
91 <param name="mask_extension" type="integer" value="0" label="Mask Extension" help="-maskExtend,--maskExtension &amp;lt;maskExtension&amp;gt;"/>
92 </when>
93 </conditional>
94
95 <expand macro="gatk_param_type_conditional" />
96
97 <conditional name="cluster_snp_type">
98 <param name="cluster_snp_type_selector" type="select" label="Cluster SNPs">
99 <option value="cluster_snp">Cluster SNPs</option>
100 <option value="do_not_cluster_snp" selected="True">Do not cluster SNPs</option>
101 </param>
102 <when value="do_not_cluster_snp">
103 <!-- Do nothing here -->
104 </when>
105 <when value="cluster_snp">
106 <param name="cluster_size" type="integer" value="3" label="The number of SNPs which make up a cluster" help="-cluster,--clusterSize &amp;lt;clusterSize&amp;gt;"/>
107 <param name="cluster_window_size" type="integer" value="0" label="The window size (in bases) in which to evaluate clustered SNPs" help="-window,--clusterWindowSize &amp;lt;clusterWindowSize&amp;gt;"/>
108 </when>
109 </conditional>
110
111 <param name="missing_values_in_expressions_should_evaluate_as_failing" type="boolean" truevalue="--missingValuesInExpressionsShouldEvaluateAsFailing" falsevalue="" label="Should missing values be considered failing the expression" help="--missingValuesInExpressionsShouldEvaluateAsFailing" />
112
113 </inputs>
114 <outputs>
115 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
116 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
117 </outputs>
118 <tests>
119 <test>
120 <param name="reference_source_selector" value="history" />
121 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
122 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
123 <param name="filter_expression" value="MQ &lt; 37.74 || MQ0 &gt; 50" />
124 <param name="filter_name" value="Galaxy_filter" />
125 <param name="is_genotype_filter" />
126 <param name="mask_rod_bind_type_selector" value="set_mask" />
127 <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
128 <param name="mask_rod_name" value="." />
129 <param name="mask_extension" value="0" />
130 <param name="gatk_param_type_selector" value="basic" />
131 <param name="cluster_snp_type_selector" value="do_not_cluster_snp" />
132 <param name="missing_values_in_expressions_should_evaluate_as_failing" />
133 <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" />
134 <output name="output_log" file="gatk/gatk_variant_filtration/gatk_variant_filtration_out_1.log.contains" compare="contains" />
135 </test>
136 </tests>
137 <help>
138 **What it does**
139
140 Filters variant calls using a number of user-selectable, parameterizable criteria.
141
142 For more information on using the VariantFiltration module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_filters_VariantFiltration.html&gt;`_.
143
144 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
145
146 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
147
148 ------
149
150 **Inputs**
151
152 GenomeAnalysisTK: VariantFiltration accepts a VCF input file.
153
154
155 **Outputs**
156
157 The output is in VCF format.
158
159 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
160
161 -------
162
163 **Settings**::
164
165
166 filterExpression One or more expression used with INFO fields to filter (see wiki docs for more info)
167 filterName Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
168 genotypeFilterExpression One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)
169 genotypeFilterName Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
170 clusterSize The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]
171 clusterWindowSize The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]
172 maskName The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']
173 missingValuesInExpressionsShouldEvaluateAsFailing When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?
174
175 @CITATION_SECTION@
176 </help>
177 <expand macro="citations" />
178 </tool>