comparison variant_filtration.xml @ 0:340633249b3d draft

Uploaded
author bgruening
date Mon, 02 Dec 2013 06:18:36 -0500
parents
children 8bcc13094767
comparison
equal deleted inserted replaced
-1:000000000000 0:340633249b3d
1 <tool id="gatk2_variant_filtration" name="Variant Filtration" version="0.0.7">
2 <description>on VCF files</description>
3 <expand macro="requirements" />
4 <macros>
5 <import>gatk2_macros.xml</import>
6 </macros>
7 <command interpreter="python">
8 #from binascii import hexlify
9
10 gatk2_wrapper.py
11 --stdout "${output_log}"
12 -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
13 -p '
14 @JAR_PATH@
15 -T "VariantFiltration"
16 \$GATK2_SITE_OPTIONS
17
18 @THREADS@
19
20 -o "${output_vcf}"
21
22 #if $reference_source.reference_source_selector != "history":
23 -R "${reference_source.ref_file.fields.path}"
24 #end if
25 '
26 #for $variant_filter in $variant_filters:
27 #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name ) )
28 -o '${ hexlify( $variant_filter ) }'
29 #end for
30
31 #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask':
32 -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}"
33 -p '
34 --maskExtension "${mask_rod_bind_type.mask_extension}"
35 --maskName "${mask_rod_bind_type.mask_rod_name}"
36 '
37 #end if
38
39 #include source=$standard_gatk_options#
40
41 ##start analysis specific options
42 #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp":
43 -p '
44 --clusterSize "${cluster_snp_type.cluster_size}"
45 --clusterWindowSize "${cluster_snp_type.cluster_window_size}"
46 '
47 #end if
48 -p '${missing_values_in_expressions_should_evaluate_as_failing}'
49 </command>
50 <inputs>
51 <conditional name="reference_source">
52 <expand macro="reference_source_selector_param" />
53 <when value="cached">
54 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
55 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
56 <options from_data_table="gatk2_picard_indexes">
57 <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
58 </options>
59 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
60 </param>
61 </when>
62 <when value="history"> <!-- FIX ME!!!! -->
63 <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
64 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
65 </when>
66 </conditional>
67
68
69 <repeat name="variant_filters" title="Variant Filters">
70 <param name="filter_expression" value="AB &lt; 0.2 || MQ0 &gt; 50" type="text" label="Filter expression" help="JEXL formatted expressions (-filter,--filterExpression &amp;lt;filterExpression&amp;gt;)">
71 <sanitizer>
72 <valid initial="string.printable">
73 <remove value="&apos;"/>
74 </valid>
75 <mapping initial="none"/>
76 </sanitizer>
77 </param>
78 <param name="filter_name" value="custom_filter" type="text" label="Filter name" help="-filterName,--filterName &amp;lt;filterName&amp;gt;"/>
79 <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" help="Use -G_filter,--genotypeFilterExpression &amp;lt;genotypeFilterExpression&amp;gt; and -G_filterName,--genotypeFilterName &amp;lt;genotypeFilterName&amp;gt; for filter type" />
80 </repeat>
81
82
83
84 <conditional name="mask_rod_bind_type">
85 <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
86 <option value="set_mask" selected="True">Set maskP</option>
87 <option value="exclude_mask">Don't set mask</option>
88 </param>
89 <when value="exclude_mask">
90 <!-- Do nothing here -->
91 </when>
92 <when value="set_mask">
93 <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" help="--mask &amp;lt;mask&amp;gt;" />
94 <param name="mask_rod_name" type="text" value="Mask" label="Mask Name" help="-maskName,--maskName &amp;lt;maskName&amp;gt;"/>
95 <param name="mask_extension" type="integer" value="0" label="Mask Extension" help="-maskExtend,--maskExtension &amp;lt;maskExtension&amp;gt;"/>
96 </when>
97 </conditional>
98
99
100 <expand macro="gatk_param_type_conditional" />
101
102 <conditional name="cluster_snp_type">
103 <param name="cluster_snp_type_selector" type="select" label="Cluster SNPs">
104 <option value="cluster_snp">Cluster SNPs</option>
105 <option value="do_not_cluster_snp" selected="True">Do not cluster SNPs</option>
106 </param>
107 <when value="do_not_cluster_snp">
108 <!-- Do nothing here -->
109 </when>
110 <when value="cluster_snp">
111 <param name="cluster_size" type="integer" value="3" label="The number of SNPs which make up a cluster" help="-cluster,--clusterSize &amp;lt;clusterSize&amp;gt;"/>
112 <param name="cluster_window_size" type="integer" value="0" label="The window size (in bases) in which to evaluate clustered SNPs" help="-window,--clusterWindowSize &amp;lt;clusterWindowSize&amp;gt;"/>
113 </when>
114 </conditional>
115
116 <param name="missing_values_in_expressions_should_evaluate_as_failing" type="boolean" truevalue="--missingValuesInExpressionsShouldEvaluateAsFailing" falsevalue="" label="Should missing values be considered failing the expression" help="--missingValuesInExpressionsShouldEvaluateAsFailing" />
117
118 </inputs>
119 <outputs>
120 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
121 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
122 </outputs>
123 <tests>
124 <test>
125 <param name="reference_source_selector" value="history" />
126 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
127 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
128 <param name="filter_expression" value="MQ &lt; 37.74 || MQ0 &gt; 50" />
129 <param name="filter_name" value="Galaxy_filter" />
130 <param name="is_genotype_filter" />
131 <param name="mask_rod_bind_type_selector" value="set_mask" />
132 <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
133 <param name="mask_rod_name" value="." />
134 <param name="mask_extension" value="0" />
135 <param name="gatk_param_type_selector" value="basic" />
136 <param name="cluster_snp_type_selector" value="do_not_cluster_snp" />
137 <param name="missing_values_in_expressions_should_evaluate_as_failing" />
138 <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" />
139 <output name="output_log" file="gatk/gatk_variant_filtration/gatk_variant_filtration_out_1.log.contains" compare="contains" />
140 </test>
141 </tests>
142 <help>
143 **What it does**
144
145 Filters variant calls using a number of user-selectable, parameterizable criteria.
146
147 For more information on using the VariantFiltration module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_filters_VariantFiltration.html&gt;`_.
148
149 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
150
151 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
152
153 ------
154
155 **Inputs**
156
157 GenomeAnalysisTK: VariantFiltration accepts a VCF input file.
158
159
160 **Outputs**
161
162 The output is in VCF format.
163
164 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
165
166 -------
167
168 **Settings**::
169
170
171 filterExpression One or more expression used with INFO fields to filter (see wiki docs for more info)
172 filterName Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
173 genotypeFilterExpression One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)
174 genotypeFilterName Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
175 clusterSize The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]
176 clusterWindowSize The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]
177 maskName The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']
178 missingValuesInExpressionsShouldEvaluateAsFailing When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?
179
180 @CITATION_SECTION@
181 </help>
182 </tool>