comparison haplotype_caller.xml @ 2:8bcc13094767 draft

Uploaded
author iuc
date Sat, 18 Jan 2014 07:21:33 -0500
parents 340633249b3d
children f244b8209eb8
comparison
equal deleted inserted replaced
1:f760c0de8e3a 2:8bcc13094767
5 <import>gatk2_macros.xml</import> 5 <import>gatk2_macros.xml</import>
6 </macros> 6 </macros>
7 <command interpreter="python"> 7 <command interpreter="python">
8 gatk2_wrapper.py 8 gatk2_wrapper.py
9 --stdout "${output_log}" 9 --stdout "${output_log}"
10 -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" 10 #for $i, $input_bam in enumerate( $reference_source.input_bams ):
11 #if str( $reference_source.input_bam.metadata.bam_index ) != "None": 11 -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
12 -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index 12 #if str( $input_bam.input_bam.metadata.bam_index ) != "None":
13 #end if 13 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
14 #end if
15 #end for
14 -p ' 16 -p '
15 @JAR_PATH@ 17 @JAR_PATH@
16 -T "HaplotypeCaller" 18 -T "HaplotypeCaller"
17 -o "${output_vcf}" 19 -o "${output_vcf}"
18 20
19 \$GATK2_SITE_OPTIONS 21 \$GATK2_SITE_OPTIONS
20 22
21 @THREADS@ 23 --num_cpu_threads_per_data_thread \${GALAXY_SLOTS:-4}
22 24
23 #if $reference_source.reference_source_selector != "history": 25 #if $reference_source.reference_source_selector != "history":
24 -R "${reference_source.ref_file.fields.path}" 26 -R "${reference_source.ref_file.fields.path}"
25 #end if 27 #end if
26 #if str($input_recal) != 'None': 28 #if str($input_recal) != 'None':
27 --BQSR "${input_recal}" 29 --BQSR "${input_recal}"
28 #end if 30 #end if
29 ' 31 '
32 @DBSNP_OPTIONS@
30 #include source=$standard_gatk_options# 33 #include source=$standard_gatk_options#
31 34
32 ##start analysis specific options 35 ##start analysis specific options
33 #if $analysis_param_type.analysis_param_type_selector == "advanced": 36 #if $analysis_param_type.analysis_param_type_selector == "advanced":
34 -p ' 37 -p '
35 #if $analysis_param_type.p_nonref_model.__str__ != "None" and len($analysis_param_type.p_nonref_model.__str__) > 0:
36 --p_nonref_model $analysis_param_type.p_nonref_model
37 #end if
38 #if $analysis_param_type.heterozygosity.__str__.strip() != '': 38 #if $analysis_param_type.heterozygosity.__str__.strip() != '':
39 --heterozygosity $analysis_param_type.heterozygosity 39 --heterozygosity $analysis_param_type.heterozygosity
40 #end if 40 #end if
41 --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}" 41 --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
42 #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES': 42 #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
43 --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}" 43 --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
44 #end if 44 #end if
45 #if $analysis_param_type.output_mode.__str__ != "None" and len($analysis_param_type.output_mode.__str__) > 0: 45 #if not $analysis_param_type.emitRefConfidence is None:
46 --output_mode $analysis_param_type.output_mode 46 --emitRefConfidence $analysis_param_type.emitRefConfidence
47 #end if 47 #end if
48 48
49 ## files 49 ## files
50 #if str($analysis_param_type.activeRegionIn) != 'None': 50 #if str($analysis_param_type.activeRegionIn) != 'None':
51 --activeRegionIn "$analysis_param_type.activeRegionIn" 51 --activeRegionIn "$analysis_param_type.activeRegionIn"
52 #end if 52 #end if
53 #if str($analysis_param_type.comp) != 'None': 53 #if str($analysis_param_type.comp) != 'None':
54 --comp "$analysis_param_type.comp" 54 --comp "$analysis_param_type.comp"
55 #end if
56 #if str($analysis_param_type.dbsnp) != 'None':
57 --dbsnp "$analysis_param_type.dbsnp"
58 #end if 55 #end if
59 ## 56 ##
60 #if str( $analysis_param_type.annotation ) != "None": 57 #if str( $analysis_param_type.annotation ) != "None":
61 #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','): 58 #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
62 --annotation "${annotation}" 59 --annotation "${annotation}"
78 75
79 ## value setings 76 ## value setings
80 #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '': 77 #if $analysis_param_type.contamination_fraction_to_filter.__str__.strip() != '':
81 --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter 78 --contamination_fraction_to_filter $analysis_param_type.contamination_fraction_to_filter
82 #end if 79 #end if
83 #if $analysis_param_type.downsampleRegion.__str__.strip() != '':
84 --downsampleRegion $analysis_param_type.downsampleRegion
85 #end if
86 #if $analysis_param_type.minPruning.__str__.strip() != '': 80 #if $analysis_param_type.minPruning.__str__.strip() != '':
87 --minPruning $analysis_param_type.minPruning 81 --minPruning $analysis_param_type.minPruning
88 #end if 82 #end if
89 #if $analysis_param_type.standard_min_confidence_threshold_for_calling.__str__.strip() != '': 83 #if $analysis_param_type.standard_min_confidence_threshold_for_calling.__str__.strip() != '':
90 --standard_min_confidence_threshold_for_calling $analysis_param_type.standard_min_confidence_threshold_for_calling 84 --standard_min_confidence_threshold_for_calling $analysis_param_type.standard_min_confidence_threshold_for_calling
97 #end if 91 #end if
98 #if $analysis_param_type.max_alternate_alleles.__str__.strip() != '': 92 #if $analysis_param_type.max_alternate_alleles.__str__.strip() != '':
99 --max_alternate_alleles $analysis_param_type.max_alternate_alleles 93 --max_alternate_alleles $analysis_param_type.max_alternate_alleles
100 #end if 94 #end if
101 ## mode selections 95 ## mode selections
102 #if $analysis_param_type.genotyping_mode.__str__ != "None" and len($analysis_param_type.genotyping_mode.__str__) > 0: 96
103 --genotyping_mode $analysis_param_type.genotyping_mode
104 #end if
105 #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0: 97 #if $analysis_param_type.pair_hmm_implementation.__str__ != "None" and len($analysis_param_type.pair_hmm_implementation.__str__) > 0:
106 --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation 98 --pair_hmm_implementation $analysis_param_type.pair_hmm_implementation
107 #end if 99 #end if
108 ## optional outputs 100 ## optional outputs
109 #if $analysis_param_type.activeRegionOut: 101 #if $analysis_param_type.activeRegionOut:
119 $analysis_param_type.debug 111 $analysis_param_type.debug
120 ' 112 '
121 #end if 113 #end if
122 </command> 114 </command>
123 <inputs> 115 <inputs>
124 <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="-BQSR,--BQSR &amp;lt;recal_file&amp;gt;" > 116 <param name="input_recal" type="data" format="gatk_report" optional="true" label="Covariates table recalibration file" help="The input covariates table file which enables on-the-fly base quality score recalibration. Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. Please be aware that one should only run recalibration with the covariates file created on the same input bam(s) (-BQSR,--BQSR &amp;lt;recal_file&amp;gt;)" />
125 <help>The input covariates table file which enables on-the-fly base quality score recalibration.
126 Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
127 Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
128 </help>
129 </param>
130 <conditional name="reference_source"> 117 <conditional name="reference_source">
131 <expand macro="reference_source_selector_param" /> 118 <expand macro="reference_source_selector_param" />
132 <when value="cached"> 119 <when value="cached">
133 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;"> 120 <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
134 <validator type="unspecified_build" /> 121 <param name="input_bam" type="data" format="bam" label="BAM file">
135 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> 122 <validator type="unspecified_build" />
136 </param> 123 <validator type="dataset_metadata_in_data_table" table_name="gatk2_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
124 </param>
125 </repeat>
137 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" > 126 <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" >
138 <options from_data_table="gatk2_picard_indexes"> 127 <options from_data_table="gatk2_picard_indexes">
139 <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> 128 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> does not yet work in a repeat...-->
140 </options> 129 </options>
141 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 130 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
142 </param> 131 </param>
143 </when> 132 </when>
144 <when value="history"> 133 <when value="history">
145 <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &amp;lt;input_file&amp;gt;" /> 134 <repeat name="input_bams" title="BAM file" min="1" help="-I,--input_file &amp;lt;input_file&amp;gt;">
146 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;"> 135 <param name="input_bam" type="data" format="bam" label="BAM file" />
147 <options> 136 </repeat>
148 <filter type="data_meta" key="dbkey" ref="input_bam" /> 137 <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
149 </options>
150 </param>
151 </when> 138 </when>
152 </conditional> 139 </conditional>
140 <expand macro="dbsnp_param" />
153 141
154 <expand macro="gatk_param_type_conditional" /> 142 <expand macro="gatk_param_type_conditional" />
155 143
156 <conditional name="analysis_param_type"> 144 <conditional name="analysis_param_type">
157 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options"> 145 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced Analysis options">
209 197
210 <param name="comp" type="data" format="vcf" optional="true" label="comp" help="--comp / -comp comparison VCF file"/> 198 <param name="comp" type="data" format="vcf" optional="true" label="comp" help="--comp / -comp comparison VCF file"/>
211 <param name="contamination_fraction_to_filter" type="float" value="0.05" optional="true" label="contamination_fraction_to_filter" help="--contamination_fraction_to_filter / -contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove"> 199 <param name="contamination_fraction_to_filter" type="float" value="0.05" optional="true" label="contamination_fraction_to_filter" help="--contamination_fraction_to_filter / -contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove">
212 <validator type="in_range" message="value between 0.00 and 1.00" min="0" max="1"/> 200 <validator type="in_range" message="value between 0.00 and 1.00" min="0" max="1"/>
213 </param> 201 </param>
214 <param name="dbsnp" type="data" format="vcf" optional="true" label="dbsnp" help="--dbsnp / -D dbSNP file"/>
215 <param name="debug" type="boolean" checked="False" truevalue="-debug" falsevalue="" label="debug" help="--debug / -debug If specified, print out very verbose debug information about each triggering active region"/> 202 <param name="debug" type="boolean" checked="False" truevalue="-debug" falsevalue="" label="debug" help="--debug / -debug If specified, print out very verbose debug information about each triggering active region"/>
216 <param name="downsampleRegion" type="integer" value="1000" optional="true" label="downsampleRegion" help="--downsampleRegion / -dr coverage, per-sample, to downsample each active region to"/>
217 203
218 <conditional name="genotyping_mode_type"> 204 <conditional name="genotyping_mode_type">
219 <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &amp;lt;genotyping_mode&amp;gt;"> 205 <param name="genotyping_mode" type="select" label="How to determine the alternate allele to use for genotyping" help="-gt_mode,--genotyping_mode &amp;lt;genotyping_mode&amp;gt;">
220 <option value="DISCOVERY" selected="True">DISCOVERY</option> 206 <option value="DISCOVERY" selected="True">DISCOVERY</option>
221 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> 207 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option>
232 <param name="graphOutput" type="boolean" checked="False" truevalue="" falsevalue="" label="graphOutput" help="--graphOutput / -graph File to which debug assembly graph information should be written"/> 218 <param name="graphOutput" type="boolean" checked="False" truevalue="" falsevalue="" label="graphOutput" help="--graphOutput / -graph File to which debug assembly graph information should be written"/>
233 <param name="heterozygosity" type="float" value="0.0010" optional="true" label="heterozygosity" help="--heterozygosity / -hets Heterozygosity value used to compute prior likelihoods for any locus"/> 219 <param name="heterozygosity" type="float" value="0.0010" optional="true" label="heterozygosity" help="--heterozygosity / -hets Heterozygosity value used to compute prior likelihoods for any locus"/>
234 <param name="minPruning" type="integer" value="1" optional="true" label="minPruning" help="--minPruning / -minPruning The minimum allowed pruning factor in assembly graph. Paths with &gt;= X supporting kmers are pruned from the graph"> 220 <param name="minPruning" type="integer" value="1" optional="true" label="minPruning" help="--minPruning / -minPruning The minimum allowed pruning factor in assembly graph. Paths with &gt;= X supporting kmers are pruned from the graph">
235 <validator type="in_range" message="value between 0 and 127" min="0" max="127"/> 221 <validator type="in_range" message="value between 0 and 127" min="0" max="127"/>
236 </param> 222 </param>
237 <param name="output_mode" type="select" optional="true" label="output_mode" help="--output_mode / -out_mode Specifies which type of calls we should output"> 223 <!-- http://www.broadinstitute.org/gatk/guide/article?id=2940 -->
238 <option value="EMIT_VARIANTS_ONLY" selected="True">EMIT_VARIANTS_ONLY</option> 224 <param name="emitRefConfidence" type="select" optional="true" label="Output confidence estimates" help="Emitting a per-bp or summarized confidence estimate for a site being strictly homozygous-reference (--emitRefConfidence)">
239 <option value="EMIT_ALL_CONFIDENT_SITES">EMIT_ALL_CONFIDENT_SITES</option> 225 <option value="NONE" selected="True">don't emit anything</option>
240 <option value="EMIT_ALL_SITES">EMIT_ALL_SITES</option> 226 <option value="BP_RESOLUTION">BP_RESOLUTION (emit detailed information for each BP)</option>
227 <option value="GVCF">GVCF (emit a block summarized version of the BP_RESOLUTION data)</option>
241 </param> 228 </param>
242 <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM The PairHMM implementation to use for genotype likelihood calculations"> 229 <param name="pair_hmm_implementation" type="select" optional="true" label="pair_hmm_implementation" help="--pair_hmm_implementation / -pairHMM The PairHMM implementation to use for genotype likelihood calculations">
243 <option value="EXACT">EXACT</option> 230 <option value="EXACT">EXACT</option>
244 <option value="ORIGINAL">ORIGINAL</option> 231 <option value="ORIGINAL">ORIGINAL</option>
245 <option value="CACHING">CACHING</option> 232 <option value="CACHING">CACHING</option>
250 <param name="useAllelesTrigger" type="boolean" checked="False" truevalue="-allelesTrigger" falsevalue="" label="useAllelesTrigger" help="--useAllelesTrigger / -allelesTrigger If specified, use additional trigger on variants found in an external alleles file"/> 237 <param name="useAllelesTrigger" type="boolean" checked="False" truevalue="-allelesTrigger" falsevalue="" label="useAllelesTrigger" help="--useAllelesTrigger / -allelesTrigger If specified, use additional trigger on variants found in an external alleles file"/>
251 <param name="fullHaplotype" type="boolean" checked="False" truevalue="-fullHaplotype" falsevalue="" label="fullHaplotype" help="--fullHaplotype / -fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference"/> 238 <param name="fullHaplotype" type="boolean" checked="False" truevalue="-fullHaplotype" falsevalue="" label="fullHaplotype" help="--fullHaplotype / -fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference"/>
252 <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM Flat gap continuation penalty for use in the Pair HMM"/> 239 <param name="gcpHMM" type="integer" value="10" optional="true" label="gcpHMM" help="--gcpHMM / -gcpHMM Flat gap continuation penalty for use in the Pair HMM"/>
253 <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/> 240 <param name="genotypeFullActiveRegion" type="boolean" checked="False" truevalue="-genotypeFullActiveRegion" falsevalue="" label="genotypeFullActiveRegion" help="--genotypeFullActiveRegion / -genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping"/>
254 <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles Maximum number of alternate alleles to genotype"/> 241 <param name="max_alternate_alleles" type="integer" value="6" optional="true" label="max_alternate_alleles" help="--max_alternate_alleles / -maxAltAlleles Maximum number of alternate alleles to genotype"/>
255 <param name="p_nonref_model" type="select" optional="true" label="p_nonref_model" help="--p_nonref_model / -pnrm Non-reference probability calculation model to employ">
256 <option value="EXACT_INDEPENDENT" selected="True">EXACT_INDEPENDENT experimental implementation - for testing only</option>
257 <option value="EXACT_REFERENCE">EXACT_REFERENCE reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles</option>
258 <option value="EXACT_ORIGINAL">EXACT_ORIGINAL original biallelic exact model, for testing only</option>
259 <option value="EXACT_GENERAL_PLOIDY">implementation that supports any sample ploidy</option>
260 </param>
261
262 </when> 242 </when>
263 </conditional> 243 </conditional>
264 </inputs> 244 </inputs>
265 <outputs> 245 <outputs>
266 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" /> 246 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (VCF)" />
321 annotation One or more specific annotations to apply to variant calls 301 annotation One or more specific annotations to apply to variant calls
322 comp comparison VCF file 302 comp comparison VCF file
323 contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove 303 contamination Fraction of contamination in sequencing data (for all samples) to aggressively remove
324 dbsnp dbSNP file 304 dbsnp dbSNP file
325 debug If specified, print out very verbose debug information about each triggering active region 305 debug If specified, print out very verbose debug information about each triggering active region
326 downsampleRegion coverage, per-sample, to downsample each active region to
327 excludeAnnotation One or more specific annotations to exclude 306 excludeAnnotation One or more specific annotations to exclude
328 genotyping_mode Specifies how to determine the alternate alleles to use for genotyping 307 genotyping_mode Specifies how to determine the alternate alleles to use for genotyping
329 graphOutput File to which debug assembly graph information should be written 308 graphOutput File to which debug assembly graph information should be written
330 group One or more classes/groups of annotations to apply to variant calls 309 group One or more classes/groups of annotations to apply to variant calls
331 heterozygosity Heterozygosity value used to compute prior likelihoods for any locus 310 heterozygosity Heterozygosity value used to compute prior likelihoods for any locus
332 minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph 311 minPruning The minimum allowed pruning factor in assembly graph. Paths with less than or equal supporting kmers are pruned from the graph
333 output_mode Specifies which type of calls we should output
334 pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations 312 pair_hmm_implementation The PairHMM implementation to use for genotype likelihood calculations
335 stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called 313 stand_call_conf The minimum phred-scaled confidence threshold at which variants should be called
336 stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) 314 stand_emit_conf The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)
337 useAllelesTrigger If specified, use additional trigger on variants found in an external alleles file 315 useAllelesTrigger If specified, use additional trigger on variants found in an external alleles file
338 fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference 316 fullHaplotype If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference
339 gcpHMM Flat gap continuation penalty for use in the Pair HMM 317 gcpHMM Flat gap continuation penalty for use in the Pair HMM
340 genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping 318 genotypeFullActiveRegion If specified, alternate alleles are considered to be the full active region for the purposes of genotyping
341 max_alternate_alleles Maximum number of alternate alleles to genotype 319 max_alternate_alleles Maximum number of alternate alleles to genotype
342 p_nonref_model Non-reference probability calculation model to employ
343
344 ------
345 320
346 @CITATION_SECTION@ 321 @CITATION_SECTION@
347 </help> 322 </help>
348 </tool> 323 </tool>