comparison freebayes.xml @ 0:61fe907fc37c

Uploaded freebayes with tool dependencies
author devteam
date Mon, 02 Jul 2012 17:49:47 -0400
parents
children 838dd557c84c
comparison
equal deleted inserted replaced
-1:000000000000 0:61fe907fc37c
1 <?xml version="1.0"?>
2 <tool id="freebayes" name="FreeBayes" version="0.0.2">
3 <requirements>
4 <requirement type="package" version="0.9.4_9696d0ce8a962f7bb61c4791be5ce44312b81cf8">freebayes</requirement>
5 <requirement type="package" version="0.1.18">samtools</requirement>
6 </requirements>
7 <description> - Bayesian genetic variant detector</description>
8 <command>
9 ##set up input files
10 #set $reference_fasta_filename = "localref.fa"
11 #if str( $reference_source.reference_source_selector ) == "history":
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
13 samtools faidx "${reference_fasta_filename}" 2&gt;&amp;1 || echo "Error running samtools faidx for FreeBayes" &gt;&amp;2 &amp;&amp;
14 #else:
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
16 #end if
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
20 #end for
21 ##finished setting up inputs
22
23 ##start FreeBayes commandline
24 freebayes
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ):
26 --bam "localbam_${bam_count}.bam"
27 #end for
28 --fasta-reference "${reference_fasta_filename}"
29
30 ##outputs
31 --vcf "${output_vcf}"
32
33 ##advanced options
34 #if str( $options_type.options_type_selector ) == "advanced":
35 ##additional outputs
36 #if $options_type.output_trace_option:
37 --trace "${output_trace}"
38 #end if
39 #if $options_type.output_failed_alleles_option:
40 --failed-alleles "${output_failed_alleles_bed}"
41 #end if
42
43 ##additional inputs
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file":
45 --targets "${options_type.target_limit_type.input_target_bed}"
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region":
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}"
48 #end if
49 #if $options_type.input_sample_file:
50 --samples "${options_type.input_sample_file}"
51 #end if
52 #if $options_type.input_populations_file:
53 --populations "${options_type.input_populations_file}"
54 #end if
55 #if $options_type.input_cnv_map_bed:
56 --cnv-map "${options_type.input_cnv_map_bed}"
57 #end if
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf":
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}"
60 ${options_type.input_variant_type.only_use_input_alleles}
61 #end if
62
63 ##reporting
64 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set":
65 --pvar "${options_type.section_reporting_type.pvar}"
66 ${options_type.section_reporting_type.show_reference_repeats}
67 #end if
68
69 ##population model
70 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set":
71 --theta "${options_type.section_population_model_type.theta}"
72 --ploidy "${options_type.section_population_model_type.ploidy}"
73 ${options_type.section_population_model_type.pooled}
74 #end if
75
76 ##reference allele
77 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele":
78 --use-reference-allele
79 ${options_type.use_reference_allele_type.diploid_reference}
80 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}"
81 #end if
82
83 ##allele scope
84 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set":
85 ${options_type.section_allele_scope_type.no_snps}
86 ${options_type.section_allele_scope_type.no_indels}
87 ${options_type.section_allele_scope_type.no_mnps}
88 ${options_type.section_allele_scope_type.no_complex}
89 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}"
90 #if $options_type.section_allele_scope_type.max_complex_gap:
91 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}"
92 #end if
93 #end if
94
95 ##indel realignment
96 ${options_type.left_align_indels}
97
98 ##input filters
99 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set":
100 ${options_type.section_input_filters_type.use_duplicate_reads}
101 #if str( $options_type.section_input_filters_type.no_filter_type.no_filter_type_selector ) == "apply_filters":
102 --min-mapping-quality "${options_type.section_input_filters_type.no_filter_type.min_mapping_quality}"
103 --min-base-quality "${options_type.section_input_filters_type.no_filter_type.min_base_quality}"
104 --min-supporting-quality "${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_bq}"
105 #else:
106 --no-filters
107 #end if
108 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}"
109 #if $options_type.section_input_filters_type.read_mismatch_limit:
110 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}"
111 #end if
112 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}"
113 #if $options_type.section_input_filters_type.read_snp_limit:
114 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}"
115 #end if
116 #if $options_type.section_input_filters_type.read_indel_limit:
117 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}"
118 #end if
119 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}"
120 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}"
121 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}"
122 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}"
123 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}"
124 --min-coverage "${options_type.section_input_filters_type.min_coverage}"
125 #end if
126
127 ##bayesian priors
128 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set":
129 ${options_type.section_bayesian_priors_type.no_ewens_priors}
130 ${options_type.section_bayesian_priors_type.no_population_priors}
131 ${options_type.section_bayesian_priors_type.hwe_priors}
132 #end if
133
134 ##observation prior expectations
135 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set":
136 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors}
137 ${options_type.section_observation_prior_expectations_type.allele_balance_priors}
138 #end if
139
140 ##algorithmic features
141 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set":
142 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}"
143 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}"
144 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}"
145 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}"
146 ${options_type.section_algorithmic_features_type.no_permute}
147 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes}
148 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold:
149 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}"
150 #end if
151 ${options_type.section_algorithmic_features_type.use_mapping_quality}
152 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}"
153 ${options_type.section_algorithmic_features_type.no_marginals}
154 #end if
155
156 #end if
157 </command>
158 <inputs>
159 <conditional name="reference_source">
160 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
161 <option value="cached">Locally cached</option>
162 <option value="history">History</option>
163 </param>
164 <when value="cached">
165 <repeat name="input_bams" title="Sample BAM file" min="1">
166 <param name="input_bam" type="data" format="bam" label="BAM file">
167 <validator type="unspecified_build" />
168 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." />
169 </param>
170 </repeat>
171 <param name="ref_file" type="select" label="Using reference genome">
172 <options from_data_table="sam_fa_indexes">
173 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
174 </options>
175 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
176 </param>
177 </when>
178 <when value="history"> <!-- FIX ME!!!! -->
179 <repeat name="input_bams" title="Sample BAM file" min="1">
180 <param name="input_bam" type="data" format="bam" label="BAM file" />
181 </repeat>
182 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
183 </when>
184 </conditional>
185
186 <conditional name="options_type">
187 <param name="options_type_selector" type="select" label="Basic or Advanced options">
188 <option value="basic" selected="True">Basic</option>
189 <option value="advanced">Advanced</option>
190 </param>
191 <when value="basic">
192 <!-- Do nothing here -->
193 </when>
194 <when value="advanced">
195
196 <!-- output -->
197 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" />
198 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" />
199
200
201 <!-- input -->
202 <conditional name="target_limit_type">
203 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets">
204 <option value="do_not_limit" selected="True">Do not limit</option>
205 <option value="limit_by_target_file">Limit by target file</option>
206 <option value="limit_by_region">Limit to region</option>
207 </param>
208 <when value="do_not_limit">
209 <!-- Do nothing here -->
210 </when>
211 <when value="limit_by_target_file">
212 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." />
213 </when>
214 <when value="limit_by_region">
215 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? -->
216 <param name="region_start" type="integer" label="Region Start" value="" />
217 <param name="region_end" type="integer" label="Region End" value="" />
218 </when>
219 </conditional>
220 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" />
221 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" />
222 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" />
223 <conditional name="input_variant_type">
224 <param name="input_variant_type_selector" type="select" label="Provide variants file">
225 <option value="do_not_provide" selected="True">Do not provide</option>
226 <option value="provide_vcf">Provide VCF file</option>
227 </param>
228 <when value="do_not_provide">
229 <!-- Do nothing here -->
230 </when>
231 <when value="provide_vcf">
232 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" />
233 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" />
234 </when>
235 </conditional>
236
237
238 <!-- reporting -->
239 <conditional name="section_reporting_type">
240 <param name="section_reporting_type_selector" type="select" label="Set Reporting options">
241 <option value="do_not_set" selected="True">Do not set</option>
242 <option value="set">Set</option>
243 </param>
244 <when value="do_not_set">
245 <!-- do nothing here -->
246 </when>
247 <when value="set">
248 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" />
249 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" />
250 </when>
251 </conditional>
252
253
254 <!-- population model -->
255 <conditional name="section_population_model_type">
256 <param name="section_population_model_type_selector" type="select" label="Set population model options">
257 <option value="do_not_set" selected="True">Do not set</option>
258 <option value="set">Set</option>
259 </param>
260 <when value="do_not_set">
261 <!-- do nothing here -->
262 </when>
263 <when value="set">
264 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/>
265 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" />
266 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." />
267 </when>
268 </conditional>
269
270 <!-- reference allele -->
271 <conditional name="use_reference_allele_type">
272 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis">
273 <option value="do_not_include_reference_allele" selected="True">Do not include</option>
274 <option value="include_reference_allele">Include</option>
275 </param>
276 <when value="do_not_include_reference_allele">
277 <!-- Do nothing here -->
278 </when>
279 <when value="include_reference_allele">
280 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" />
281 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" />
282 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" />
283 </when>
284 </conditional>
285
286 <!-- allele scope -->
287 <conditional name="section_allele_scope_type">
288 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options">
289 <option value="do_not_set" selected="True">Do not set</option>
290 <option value="set">Set</option>
291 </param>
292 <when value="do_not_set">
293 <!-- do nothing here -->
294 </when>
295 <when value="set">
296 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" />
297 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" />
298 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" />
299 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" />
300 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" />
301 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/>
302 </when>
303 </conditional>
304
305 <!-- indel realignment -->
306 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" />
307
308 <!-- input filters -->
309 <conditional name="section_input_filters_type">
310 <param name="section_input_filters_type_selector" type="select" label="Set input filters options">
311 <option value="do_not_set" selected="True">Do not set</option>
312 <option value="set">Set</option>
313 </param>
314 <when value="do_not_set">
315 <!-- do nothing here -->
316 </when>
317 <when value="set">
318 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" />
319 <conditional name="no_filter_type">
320 <param name="no_filter_type_selector" type="select" label="Apply filters">
321 <option value="apply_filters" selected="True">Apply</option>
322 <option value="no_filters">Do not apply</option>
323 </param>
324 <when value="no_filters">
325 <!-- Do nothing here --> <!-- no-filters -->
326 </when>
327 <when value="apply_filters">
328 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="30" />
329 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="20" />
330 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" />
331 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" />
332 </when>
333 </conditional>
334 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is &gt;=" value="10" />
335 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
336 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality &gt;= mismatch-base-quality-threshold" value="1.0" />
337 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality &gt;= mismatch-base-quality-threshold" value="" optional="True" />
338 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" />
339 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" />
340 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" />
341 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" />
342 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" />
343 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" />
344 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" />
345 </when>
346 </conditional>
347
348
349 <!-- bayesian priors -->
350 <conditional name="section_bayesian_priors_type">
351 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options">
352 <option value="do_not_set" selected="True">Do not set</option>
353 <option value="set">Set</option>
354 </param>
355 <when value="do_not_set">
356 <!-- do nothing here -->
357 </when>
358 <when value="set">
359 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" />
360 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" />
361 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" />
362 </when>
363 </conditional>
364
365 <!-- observation prior expectations -->
366 <conditional name="section_observation_prior_expectations_type">
367 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options">
368 <option value="do_not_set" selected="True">Do not set</option>
369 <option value="set">Set</option>
370 </param>
371 <when value="do_not_set">
372 <!-- do nothing here -->
373 </when>
374 <when value="set">
375 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" />
376 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" />
377 </when>
378 </conditional>
379
380
381 <!-- algorithmic features -->
382 <conditional name="section_algorithmic_features_type">
383 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options">
384 <option value="do_not_set" selected="True">Do not set</option>
385 <option value="set">Set</option>
386 </param>
387 <when value="do_not_set">
388 <!-- do nothing here -->
389 </when>
390 <when value="set">
391 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" />
392 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" />
393 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" />
394 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" />
395 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" />
396 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" />
397 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" />
398 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" />
399 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" />
400 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" />
401 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" />
402 </when>
403 </conditional>
404
405
406 </when>
407 </conditional>
408
409 </inputs>
410 <outputs>
411 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
412 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)">
413 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter>
414 </data>
415 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)">
416 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter>
417 </data>
418 </outputs>
419 <tests>
420 <test>
421 <param name="reference_source_selector" value="history" />
422 <param name="ref_file" ftype="fasta" value="phiX.fasta"/>
423 <param name="input_bam" ftype="bam" value="gatk/fake_phiX_reads_1.bam"/>
424 <param name="options_type_selector" value="basic"/>
425 <output name="output_vcf" file="variant_detection/freebayes/freebayes_out_1.vcf.contains" compare="contains"/>
426 <!-- <output name="output_failed_alleles_bed" file="empty_file.dat" />
427 <output name="output_trace" file="variant_detection/freebayes/freebayes_out_1.output_trace" /> -->
428 </test>
429 </tests>
430 <help>
431 **What it does**
432
433 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file.
434
435 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners.
436
437 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development.
438
439 Go `here &lt;http://bioinformatics.bc.edu/marthlab/FreeBayes&gt;`_ for details on FreeBayes.
440
441 ------
442
443 **Inputs**
444
445 FreeBayes accepts an input aligned BAM file.
446
447
448 **Outputs**
449
450 The output is in the VCF format.
451
452 -------
453
454 **Settings**::
455
456 input and output:
457
458 -b --bam FILE Add FILE to the set of BAM files to be analyzed.
459 -c --stdin Read BAM input on stdin.
460 -v --vcf FILE Output VCF-format results to FILE.
461 -f --fasta-reference FILE
462 Use FILE as the reference sequence for analysis.
463 An index file (FILE.fai) will be created if none exists.
464 If neither --targets nor --region are specified, FreeBayes
465 will analyze every position in this reference.
466 -t --targets FILE
467 Limit analysis to targets listed in the BED-format FILE.
468 -r --region &lt;chrom&gt;:&lt;start_position&gt;..&lt;end_position&gt;
469 Limit analysis to the specified region, 0-base coordinates,
470 end_position not included (same as BED format).
471 -s --samples FILE
472 Limit analysis to samples listed (one per line) in the FILE.
473 By default FreeBayes will analyze all samples in its input
474 BAM files.
475 --populations FILE
476 Each line of FILE should list a sample and a population which
477 it is part of. The population-based bayesian inference model
478 will then be partitioned on the basis of the populations.
479 -A --cnv-map FILE
480 Read a copy number map from the BED file FILE, which has
481 the format:
482 reference sequence, start, end, sample name, copy number
483 ... for each region in each sample which does not have the
484 default copy number as set by --ploidy.
485 -L --trace FILE Output an algorithmic trace to FILE.
486 --failed-alleles FILE
487 Write a BED file of the analyzed positions which do not
488 pass --pvar to FILE.
489 -@ --variant-input VCF
490 Use variants reported in VCF file as input to the algorithm.
491 A report will be generated for every record in the VCF file.
492 -l --only-use-input-alleles
493 Only provide variant calls and genotype likelihoods for sites
494 and alleles which are provided in the VCF input, and provide
495 output in the VCF for all input alleles, not just those which
496 have support in the data.
497
498 reporting:
499
500 -P --pvar N Report sites if the probability that there is a polymorphism
501 at the site is greater than N. default: 0.0001
502 -_ --show-reference-repeats
503 Calculate and show information about reference repeats in
504 the VCF output.
505
506 population model:
507
508 -T --theta N The expected mutation rate or pairwise nucleotide diversity
509 among the population under analysis. This serves as the
510 single parameter to the Ewens Sampling Formula prior model
511 default: 0.001
512 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2
513 -J --pooled Assume that samples result from pooled sequencing.
514 When using this flag, set --ploidy to the number of
515 alleles in each sample.
516
517 reference allele:
518
519 -Z --use-reference-allele
520 This flag includes the reference allele in the analysis as
521 if it is another sample from the same population.
522 -H --diploid-reference
523 If using the reference sequence as a sample (-Z),
524 treat it as diploid. default: false (reference is haploid)
525 --reference-quality MQ,BQ
526 Assign mapping quality of MQ to the reference allele at each
527 site and base quality of BQ. default: 100,60
528
529 allele scope:
530
531 -I --no-snps Ignore SNP alleles.
532 -i --no-indels Ignore insertion and deletion alleles.
533 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs.
534 -u --no-complex Ignore complex events (composites of other classes).
535 -n --use-best-n-alleles N
536 Evaluate only the best N SNP alleles, ranked by sum of
537 supporting quality scores. (Set to 0 to use all; default: all)
538 -E --max-complex-gap N
539 Allow complex alleles with contiguous embedded matches of up
540 to this length.
541
542 indel realignment:
543
544 -O --left-align-indels
545 Left-realign and merge gaps embedded in reads. default: false
546
547 input filters:
548
549 -4 --use-duplicate-reads
550 Include duplicate-marked alignments in the analysis.
551 default: exclude duplicates
552 -m --min-mapping-quality Q
553 Exclude alignments from analysis if they have a mapping
554 quality less than Q. default: 30
555 -q --min-base-quality Q
556 Exclude alleles from analysis if their supporting base
557 quality is less than Q. default: 20
558 -R --min-supporting-quality MQ,BQ
559 In order to consider an alternate allele, at least one supporting
560 alignment must have mapping quality MQ, and one supporting
561 allele must have base quality BQ. default: 0,0, unset
562 -Q --mismatch-base-quality-threshold Q
563 Count mismatches toward --read-mismatch-limit if the base
564 quality of the mismatch is &gt;= Q. default: 10
565 -U --read-mismatch-limit N
566 Exclude reads with more than N mismatches where each mismatch
567 has base quality &gt;= mismatch-base-quality-threshold.
568 default: ~unbounded
569 -z --read-max-mismatch-fraction N
570 Exclude reads with more than N [0,1] fraction of mismatches where
571 each mismatch has base quality &gt;= mismatch-base-quality-threshold
572 default: 1.0
573 -$ --read-snp-limit N
574 Exclude reads with more than N base mismatches, ignoring gaps
575 with quality &gt;= mismatch-base-quality-threshold.
576 default: ~unbounded
577 -e --read-indel-limit N
578 Exclude reads with more than N separate gaps.
579 default: ~unbounded
580 -0 --no-filters Do not use any input base and mapping quality filters
581 Equivalent to -m 0 -q 0 -R 0 -S 0
582 -x --indel-exclusion-window
583 Ignore portions of alignments this many bases from a
584 putative insertion or deletion allele. default: 0
585 -F --min-alternate-fraction N
586 Require at least this fraction of observations supporting
587 an alternate allele within a single individual in the
588 in order to evaluate the position. default: 0.0
589 -C --min-alternate-count N
590 Require at least this count of observations supporting
591 an alternate allele within a single individual in order
592 to evaluate the position. default: 1
593 -3 --min-alternate-qsum N
594 Require at least this sum of quality of observations supporting
595 an alternate allele within a single individual in order
596 to evaluate the position. default: 0
597 -G --min-alternate-total N
598 Require at least this count of observations supporting
599 an alternate allele within the total population in order
600 to use the allele in analysis. default: 1
601 -! --min-coverage N
602 Require at least this coverage to process a site. default: 0
603
604 bayesian priors:
605
606 -Y --no-ewens-priors
607 Turns off the Ewens' Sampling Formula component of the priors.
608 -k --no-population-priors
609 Equivalent to --pooled --no-ewens-priors
610 -w --hwe-priors Use the probability of the combination arising under HWE given
611 the allele frequency as estimated by observation frequency.
612
613 observation prior expectations:
614
615 -V --binomial-obs-priors
616 Incorporate expectations about osbervations into the priors,
617 Uses read placement probability, strand balance probability,
618 and read position (5'-3') probability.
619 -a --allele-balance-priors
620 Use aggregate probability of observation balance between alleles
621 as a component of the priors. Best for observations with minimal
622 inherent reference bias.
623
624 algorithmic features:
625
626 -M --site-selection-max-iterations N
627 Uses hill-climbing algorithm to search posterior space for N
628 iterations to determine if the site should be evaluated. Set to 0
629 to prevent use of this algorithm for site selection, and
630 to a low integer for improvide site selection at a slight
631 performance penalty. default: 5.
632 -B --genotyping-max-iterations N
633 Iterate no more than N times during genotyping step. default: 25.
634 --genotyping-max-banddepth N
635 Integrate no deeper than the Nth best genotype by likelihood when
636 genotyping. default: 6.
637 -W --posterior-integration-limits N,M
638 Integrate all genotype combinations in our posterior space
639 which include no more than N samples with their Mth best
640 data likelihood. default: 1,3.
641 -K --no-permute
642 Do not scale prior probability of genotype combination given allele
643 frequency by the number of permutations of included genotypes.
644 -N --exclude-unobserved-genotypes
645 Skip sample genotypings for which the sample has no supporting reads.
646 -S --genotype-variant-threshold N
647 Limit posterior integration to samples where the second-best
648 genotype likelihood is no more than log(N) from the highest
649 genotype likelihood for the sample. default: ~unbounded
650 -j --use-mapping-quality
651 Use mapping quality of alleles when calculating data likelihoods.
652 -D --read-dependence-factor N
653 Incorporate non-independence of reads by scaling successive
654 observations by this factor during data likelihood
655 calculations. default: 0.9
656 -= --no-marginals
657 Do not calculate the marginal probability of genotypes. Saves
658 time and improves scaling performance in large populations.
659
660
661 ------
662
663 **Citation**
664
665 For the underlying tool, please cite `FreeBayes &lt;http://bioinformatics.bc.edu/marthlab/FreeBayes&gt;`_.
666
667 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
668
669 </help>
670 </tool>