annotate variant_select.xml @ 15:01ff8dd37d4d draft default tip

Uploaded
author lz_hust
date Sat, 01 Jun 2019 07:20:41 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
1 <tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.2">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
2 <description>from VCF files</description>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
3 <macros>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
4 <import>gatk2_macros.xml</import>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
5 </macros>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
6 <expand macro="requirements" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
7 <expand macro="version_command" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
8 <command interpreter="python">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
9 #from binascii import hexlify
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
10
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
11 gatk2_wrapper.py
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
12 --stdout "${output_log}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
13 -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
14 -p '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
15 @JAR_PATH@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
16 -T "SelectVariants"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
17 \$GATK2_SITE_OPTIONS
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
18
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
19 @THREADS@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
20 -o "${output_vcf}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
21
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
22 #if $reference_source.reference_source_selector != "history":
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
23 -R "${reference_source.ref_file.fields.path}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
24 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
25 '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
26 -p '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
27 #if $input_concordance:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
28 --concordance "${input_concordance}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
29 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
30 #if $input_discordance:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
31 --discordance "${input_discordance}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
32 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
33
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
34 #for $exclude_sample_name in $exclude_sample_name_repeat:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
35 --exclude_sample_name "${exclude_sample_name.exclude_sample_name}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
36 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
37
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
38 ${exclude_filtered}
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
39
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
40 #for $sample_name in $sample_name_repeat:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
41 --sample_name "${sample_name.sample_name}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
42 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
43 '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
44
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
45 #for $select_expressions in $select_expressions_repeat:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
46 #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) )
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
47 -o '${ hexlify( $select_expression ) }'
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
48 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
49
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
50 ##start tool specific options
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
51 #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced':
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
52 -p '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
53 #for $esf in $analysis_param_type.exclude_sample_file:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
54 --exclude_sample_file "${esf}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
55 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
56
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
57 #for $sf in $analysis_param_type.sample_file:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
58 --sample_file "${sf}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
59 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
60
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
61 #if $analysis_param_type.input_keep_ids:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
62 --keepIDs "${analysis_param_type.input_keep_ids}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
63 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
64
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
65 ${analysis_param_type.keep_original_AC}
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
66
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
67 ${analysis_param_type.mendelian_violation}
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
68
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
69 --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
70
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
71 --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
72
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
73 --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
74
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
75 #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction':
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
76 --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
77 #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number':
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
78 --select_random_number "${analysis_param_type.select_random_type.select_random_number}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
79 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
80
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
81 #if $analysis_param_type.select_type_to_include:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
82 #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ):
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
83 --selectTypeToInclude "${type_to_include}"
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
84 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
85 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
86
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
87 ${analysis_param_type.exclude_non_variants}
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
88 '
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
89
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
90 #for $sample_expressions in $analysis_param_type.sample_expressions_repeat:
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
91 #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) )
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
92 -o '${ hexlify( $sample_expression ) }'
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
93 #end for
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
94
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
95 #end if
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
96 ##end tool specific options
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
97
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
98 #include source=$standard_gatk_options#
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
99 </command>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
100 <inputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
101 <conditional name="reference_source">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
102 <expand macro="reference_source_selector_param" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
103 <when value="cached">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
104 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &amp;lt;variant&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
105 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
106 <options from_data_table="gatk2_picard_indexes">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
107 <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
108 </options>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
109 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
110 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
111 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
112 <when value="history"> <!-- FIX ME!!!! -->
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
113 <param name="input_variant" type="data" format="vcf" label="Variant file to select" help="-V,--variant &amp;lt;variant&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
114 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
115 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
116 </conditional>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
117
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
118 <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &amp;lt;select_expressions&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
119 <param name="select_expressions" type="text" label="JEXL expression">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
120 <sanitizer>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
121 <valid initial="string.printable">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
122 <remove value="&apos;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
123 </valid>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
124 <mapping initial="none"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
125 </sanitizer>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
126 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
127 </repeat>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
128
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
129 <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &amp;lt;concordance&amp;gt;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
130 <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &amp;lt;discordance&amp;gt;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
131
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
132 <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
133 <param name="sample_name" type="text" label="Include genotypes from this sample"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
134 </repeat>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
135
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
136 <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &amp;lt;exclude_sample_name&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
137 <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
138 </repeat>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
139
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
140 <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
141
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
142 <expand macro="gatk_param_type_conditional" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
143
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
144 <expand macro="analysis_type_conditional">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
145
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
146 <param name="exclude_sample_file" type="data" format="txt" multiple="True" label="Exclude Samples by file" help="File containing a list of samples (one per line) to exclude (-xl_sf,--exclude_sample_file &amp;lt;exclude_sample_file&amp;gt;)"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
147
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
148 <param name="sample_file" type="data" format="txt" multiple="True" label="Samples by file" help="File containing a list of samples (one per line) to include (-sf,--sample_file &amp;lt;sample_file&amp;gt;)"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
149
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
150 <param name="input_keep_ids" type="data" format="txt" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &amp;lt;keepIDs&amp;gt;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
151
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
152 <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
153
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
154 <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
155
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
156 <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
157
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
158 <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &amp;lt;remove_fraction_genotypes&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
159
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
160 <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &amp;lt;restrictAllelesTo&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
161 <option value="ALL" selected="True">ALL</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
162 <option value="MULTIALLELIC">MULTIALLELIC</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
163 <option value="BIALLELIC">BIALLELIC</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
164 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
165
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
166 <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &amp;lt;sample_expressions&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
167 <param name="sample_expressions" type="text" label="Regular expression">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
168 <sanitizer>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
169 <valid initial="string.printable">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
170 <remove value="&apos;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
171 </valid>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
172 <mapping initial="none"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
173 </sanitizer>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
174 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
175 </repeat>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
176
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
177 <conditional name="select_random_type">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
178 <param name="select_random_type_selector" type="select" label="Select a random subset of variants">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
179 <option value="select_all" selected="True">Use all variants</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
180 <option value="select_random_fraction">Select random fraction</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
181 <option value="select_random_number">Select random number</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
182 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
183 <when value="select_all">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
184 <!-- Do nothing here -->
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
185 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
186 <when value="select_random_fraction">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
187 <param name="select_random_fraction" type="float" value="0" label="Fraction" min="0" max="1" help="-fraction,--select_random_fraction &amp;lt;select_random_fraction&amp;gt;"/>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
188 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
189 <when value="select_random_number">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
190 <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &amp;lt;select_random_number&amp;gt;" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
191 </when>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
192 </conditional>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
193
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
194 <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
195
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
196 <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &amp;lt;selectTypeToInclude&amp;gt;">
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
197 <option value="INDEL">INDEL</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
198 <option value="SNP">SNP</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
199 <option value="MIXED">MIXED</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
200 <option value="MNP">MNP</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
201 <option value="SYMBOLIC">SYMBOLIC</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
202 <option value="NO_VARIATION">NO_VARIATION</option>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
203 </param>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
204 </expand>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
205
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
206 </inputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
207 <outputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
208 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
209 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
210 </outputs>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
211 <tests>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
212 <test>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
213 <param name="reference_source_selector" value="history" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
214 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
215 <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
216 <param name="select_expressions_repeat" value="0" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
217 <param name="input_concordance" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
218 <param name="input_discordance" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
219 <param name="exclude_sample_name_repeat" value="0" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
220 <param name="exclude_filtered" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
221 <param name="sample_name_repeat" value="0" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
222 <param name="gatk_param_type_selector" value="basic" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
223 <param name="analysis_param_type_selector" value="basic" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
224 <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
225 <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
226 </test>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
227 </tests>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
228 <help>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
229 **What it does**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
230
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
231 Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP &gt; 1000" (depth of coverage greater than 1000x), "AF &lt; 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section &lt;http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk&gt;`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
232
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
233 For more information on using the SelectVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
234
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
235 To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gatk/guide/topic?name=best-practices&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
236
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
237 If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gatk/guide/topic?name=faqs&gt;`_.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
238
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
239 ------
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
240
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
241 **Inputs**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
242
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
243 GenomeAnalysisTK: SelectVariants accepts a VCF input file.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
244
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
245
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
246 **Outputs**
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
247
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
248 The output is in VCF format.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
249
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
250
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
251 Go `here &lt;http://www.broadinstitute.org/gatk/guide/topic?name=intro&gt;`_ for details on GATK file formats.
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
252
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
253 -------
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
254
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
255 **Settings**::
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
256
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
257
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
258 out VCFWriter stdout File to which variants should be written
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
259 variant RodBinding[VariantContext] NA Input VCF file
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
260 concordance RodBinding[VariantContext] none Output variants that were also called in this comparison track
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
261 discordance RodBinding[VariantContext] none Output variants that were not called in this comparison track
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
262 exclude_sample_file Set[File] [] File containing a list of samples (one per line) to exclude. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
263 exclude_sample_name Set[String] [] Exclude genotypes from this sample. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
264 excludeFiltered boolean false Don't include filtered loci in the analysis
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
265 excludeNonVariants boolean false Don't include loci found to be non-variant after the subsetting procedure
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
266 keepIDs File NA Only emit sites whose ID is found in this file (one ID per line)
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
267 keepOriginalAC boolean false Don't update the AC, AF, or AN values in the INFO field after selecting
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
268 mendelianViolation Boolean false output mendelian violation sites only
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
269 mvq double 0.0 Minimum genotype QUAL score for each trio member required to accept a site as a violation
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
270 remove_fraction_genotypes double 0.0 Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
271 restrictAllelesTo NumberAlleleRestriction ALL Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
272 sample_expressions Set[String] NA Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
273 sample_file Set[File] NA File containing a list of samples (one per line) to include. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
274 sample_name Set[String] [] Include genotypes from this sample. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
275 select_expressions ArrayList[String] [] One or more criteria to use when selecting the data
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
276 select_random_fraction double 0.0 Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
277 select_random_number int 0 Selects a number of variants at random from the variant track
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
278 selectTypeToInclude List[Type] [] Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
279
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
280 @CITATION_SECTION@
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
281 </help>
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
282 <expand macro="citations" />
01ff8dd37d4d Uploaded
lz_hust
parents:
diff changeset
283 </tool>