Mercurial > repos > devteam > freebayes
annotate freebayes.xml @ 4:e54bb199a47b
Fixed git revision string in tool_dependencies.xml and added functional test data
author | greg <greg@bx.psu.edu> |
---|---|
date | Wed, 15 Aug 2012 10:43:46 -0400 |
parents | 838dd557c84c |
children | 949ac66308a7 |
rev | line source |
---|---|
0 | 1 <?xml version="1.0"?> |
2 <tool id="freebayes" name="FreeBayes" version="0.0.2"> | |
3 <requirements> | |
2
838dd557c84c
Changed freebayes git clone revision to a46483351fd0196637614121868fb5c386612b55
greg <greg@bx.psu.edu>
parents:
0
diff
changeset
|
4 <requirement type="package" version="0.9.4_a46483351fd0196637614121868fb5c386612b55">freebayes</requirement> |
0 | 5 <requirement type="package" version="0.1.18">samtools</requirement> |
6 </requirements> | |
7 <description> - Bayesian genetic variant detector</description> | |
8 <command> | |
9 ##set up input files | |
10 #set $reference_fasta_filename = "localref.fa" | |
11 #if str( $reference_source.reference_source_selector ) == "history": | |
12 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
13 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for FreeBayes" >&2 && | |
14 #else: | |
15 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
16 #end if | |
17 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
18 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && | |
19 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
20 #end for | |
21 ##finished setting up inputs | |
22 | |
23 ##start FreeBayes commandline | |
24 freebayes | |
25 #for $bam_count, $input_bam in enumerate( $reference_source.input_bams ): | |
26 --bam "localbam_${bam_count}.bam" | |
27 #end for | |
28 --fasta-reference "${reference_fasta_filename}" | |
29 | |
30 ##outputs | |
31 --vcf "${output_vcf}" | |
32 | |
33 ##advanced options | |
34 #if str( $options_type.options_type_selector ) == "advanced": | |
35 ##additional outputs | |
36 #if $options_type.output_trace_option: | |
37 --trace "${output_trace}" | |
38 #end if | |
39 #if $options_type.output_failed_alleles_option: | |
40 --failed-alleles "${output_failed_alleles_bed}" | |
41 #end if | |
42 | |
43 ##additional inputs | |
44 #if str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_target_file": | |
45 --targets "${options_type.target_limit_type.input_target_bed}" | |
46 #elif str( $options_type.target_limit_type.target_limit_type_selector ) == "limit_by_region": | |
47 --region "${options_type.target_limit_type.region_chromosome}:${options_type.target_limit_type.region_start}..${options_type.target_limit_type.region_end}" | |
48 #end if | |
49 #if $options_type.input_sample_file: | |
50 --samples "${options_type.input_sample_file}" | |
51 #end if | |
52 #if $options_type.input_populations_file: | |
53 --populations "${options_type.input_populations_file}" | |
54 #end if | |
55 #if $options_type.input_cnv_map_bed: | |
56 --cnv-map "${options_type.input_cnv_map_bed}" | |
57 #end if | |
58 #if str( $options_type.input_variant_type.input_variant_type_selector ) == "provide_vcf": | |
59 --variant-input "${options_type.input_variant_type.input_variant_vcf}" | |
60 ${options_type.input_variant_type.only_use_input_alleles} | |
61 #end if | |
62 | |
63 ##reporting | |
64 #if str( $options_type.section_reporting_type.section_reporting_type_selector ) == "set": | |
65 --pvar "${options_type.section_reporting_type.pvar}" | |
66 ${options_type.section_reporting_type.show_reference_repeats} | |
67 #end if | |
68 | |
69 ##population model | |
70 #if str( $options_type.section_population_model_type.section_population_model_type_selector ) == "set": | |
71 --theta "${options_type.section_population_model_type.theta}" | |
72 --ploidy "${options_type.section_population_model_type.ploidy}" | |
73 ${options_type.section_population_model_type.pooled} | |
74 #end if | |
75 | |
76 ##reference allele | |
77 #if str( $options_type.use_reference_allele_type.use_reference_allele_type_selector ) == "include_reference_allele": | |
78 --use-reference-allele | |
79 ${options_type.use_reference_allele_type.diploid_reference} | |
80 --reference-quality "${options_type.use_reference_allele_type.reference_quality_mq},${options_type.use_reference_allele_type.reference_quality_bq}" | |
81 #end if | |
82 | |
83 ##allele scope | |
84 #if str( $options_type.section_allele_scope_type.section_allele_scope_type_selector ) == "set": | |
85 ${options_type.section_allele_scope_type.no_snps} | |
86 ${options_type.section_allele_scope_type.no_indels} | |
87 ${options_type.section_allele_scope_type.no_mnps} | |
88 ${options_type.section_allele_scope_type.no_complex} | |
89 --use-best-n-alleles "${options_type.section_allele_scope_type.use_best_n_alleles}" | |
90 #if $options_type.section_allele_scope_type.max_complex_gap: | |
91 --max-complex-gap "${options_type.section_allele_scope_type.max_complex_gap}" | |
92 #end if | |
93 #end if | |
94 | |
95 ##indel realignment | |
96 ${options_type.left_align_indels} | |
97 | |
98 ##input filters | |
99 #if str( $options_type.section_input_filters_type.section_input_filters_type_selector ) == "set": | |
100 ${options_type.section_input_filters_type.use_duplicate_reads} | |
101 #if str( $options_type.section_input_filters_type.no_filter_type.no_filter_type_selector ) == "apply_filters": | |
102 --min-mapping-quality "${options_type.section_input_filters_type.no_filter_type.min_mapping_quality}" | |
103 --min-base-quality "${options_type.section_input_filters_type.no_filter_type.min_base_quality}" | |
104 --min-supporting-quality "${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_mq},${options_type.section_input_filters_type.no_filter_type.min_supporting_quality_bq}" | |
105 #else: | |
106 --no-filters | |
107 #end if | |
108 --mismatch-base-quality-threshold "${options_type.section_input_filters_type.mismatch_base_quality_threshold}" | |
109 #if $options_type.section_input_filters_type.read_mismatch_limit: | |
110 --read-mismatch-limit "${options_type.section_input_filters_type.read_mismatch_limit}" | |
111 #end if | |
112 --read-max-mismatch-fraction "${options_type.section_input_filters_type.read_max_mismatch_fraction}" | |
113 #if $options_type.section_input_filters_type.read_snp_limit: | |
114 --read-snp-limit "${options_type.section_input_filters_type.read_snp_limit}" | |
115 #end if | |
116 #if $options_type.section_input_filters_type.read_indel_limit: | |
117 --read-indel-limit "${options_type.section_input_filters_type.read_indel_limit}" | |
118 #end if | |
119 --indel-exclusion-window "${options_type.section_input_filters_type.indel_exclusion_window}" | |
120 --min-alternate-fraction "${options_type.section_input_filters_type.min_alternate_fraction}" | |
121 --min-alternate-count "${options_type.section_input_filters_type.min_alternate_count}" | |
122 --min-alternate-qsum "${options_type.section_input_filters_type.min_alternate_qsum}" | |
123 --min-alternate-total "${options_type.section_input_filters_type.min_alternate_total}" | |
124 --min-coverage "${options_type.section_input_filters_type.min_coverage}" | |
125 #end if | |
126 | |
127 ##bayesian priors | |
128 #if str( $options_type.section_bayesian_priors_type.section_bayesian_priors_type_selector ) == "set": | |
129 ${options_type.section_bayesian_priors_type.no_ewens_priors} | |
130 ${options_type.section_bayesian_priors_type.no_population_priors} | |
131 ${options_type.section_bayesian_priors_type.hwe_priors} | |
132 #end if | |
133 | |
134 ##observation prior expectations | |
135 #if str( $options_type.section_observation_prior_expectations_type.section_observation_prior_expectations_type_selector ) == "set": | |
136 ${options_type.section_observation_prior_expectations_type.binomial_obs_priors} | |
137 ${options_type.section_observation_prior_expectations_type.allele_balance_priors} | |
138 #end if | |
139 | |
140 ##algorithmic features | |
141 #if str( $options_type.section_algorithmic_features_type.section_algorithmic_features_type_selector ) == "set": | |
142 --site-selection-max-iterations "${options_type.section_algorithmic_features_type.site_selection_max_iterations}" | |
143 --genotyping-max-iterations "${options_type.section_algorithmic_features_type.genotyping_max_iterations}" | |
144 --genotyping-max-banddepth "${options_type.section_algorithmic_features_type.genotyping_max_banddepth}" | |
145 --posterior-integration-limits "${options_type.section_algorithmic_features_type.posterior_integration_limits_n},${options_type.section_algorithmic_features_type.posterior_integration_limits_m}" | |
146 ${options_type.section_algorithmic_features_type.no_permute} | |
147 ${options_type.section_algorithmic_features_type.exclude_unobserved_genotypes} | |
148 #if $options_type.section_algorithmic_features_type.genotype_variant_threshold: | |
149 --genotype-variant-threshold "${options_type.section_algorithmic_features_type.genotype_variant_threshold}" | |
150 #end if | |
151 ${options_type.section_algorithmic_features_type.use_mapping_quality} | |
152 --read-dependence-factor "${options_type.section_algorithmic_features_type.read_dependence_factor}" | |
153 ${options_type.section_algorithmic_features_type.no_marginals} | |
154 #end if | |
155 | |
156 #end if | |
157 </command> | |
158 <inputs> | |
159 <conditional name="reference_source"> | |
160 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
161 <option value="cached">Locally cached</option> | |
162 <option value="history">History</option> | |
163 </param> | |
164 <when value="cached"> | |
165 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
166 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
167 <validator type="unspecified_build" /> | |
168 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> | |
169 </param> | |
170 </repeat> | |
171 <param name="ref_file" type="select" label="Using reference genome"> | |
172 <options from_data_table="sam_fa_indexes"> | |
173 <!-- <filter type="sam_fa_indexes" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...--> | |
174 </options> | |
175 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
176 </param> | |
177 </when> | |
178 <when value="history"> <!-- FIX ME!!!! --> | |
179 <repeat name="input_bams" title="Sample BAM file" min="1"> | |
180 <param name="input_bam" type="data" format="bam" label="BAM file" /> | |
181 </repeat> | |
182 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
183 </when> | |
184 </conditional> | |
185 | |
186 <conditional name="options_type"> | |
187 <param name="options_type_selector" type="select" label="Basic or Advanced options"> | |
188 <option value="basic" selected="True">Basic</option> | |
189 <option value="advanced">Advanced</option> | |
190 </param> | |
191 <when value="basic"> | |
192 <!-- Do nothing here --> | |
193 </when> | |
194 <when value="advanced"> | |
195 | |
196 <!-- output --> | |
197 <param name="output_failed_alleles_option" type="boolean" truevalue="--failed-alleles" falsevalue="" checked="False" label="Write out failed alleles file" /> | |
198 <param name="output_trace_option" type="boolean" truevalue="--trace" falsevalue="" checked="False" label="Write out algorithm trace file" /> | |
199 | |
200 | |
201 <!-- input --> | |
202 <conditional name="target_limit_type"> | |
203 <param name="target_limit_type_selector" type="select" label="Limit analysis to listed targets"> | |
204 <option value="do_not_limit" selected="True">Do not limit</option> | |
205 <option value="limit_by_target_file">Limit by target file</option> | |
206 <option value="limit_by_region">Limit to region</option> | |
207 </param> | |
208 <when value="do_not_limit"> | |
209 <!-- Do nothing here --> | |
210 </when> | |
211 <when value="limit_by_target_file"> | |
212 <param name="input_target_bed" type="data" format="bed" label="Limit analysis to targets listed in the BED-format FILE." /> | |
213 </when> | |
214 <when value="limit_by_region"> | |
215 <param name="region_chromosome" type="text" label="Region Chromosome" value="" /> <!--only once? --> | |
216 <param name="region_start" type="integer" label="Region Start" value="" /> | |
217 <param name="region_end" type="integer" label="Region End" value="" /> | |
218 </when> | |
219 </conditional> | |
220 <param name="input_sample_file" type="data" format="txt" label="Limit analysis to samples listed (one per line) in the FILE" optional="True" /> | |
221 <param name="input_populations_file" type="data" format="txt" label="Populations File" optional="True" /> | |
222 <param name="input_cnv_map_bed" type="data" format="bed" label="Read a copy number map from the BED file FILE" optional="True" /> | |
223 <conditional name="input_variant_type"> | |
224 <param name="input_variant_type_selector" type="select" label="Provide variants file"> | |
225 <option value="do_not_provide" selected="True">Do not provide</option> | |
226 <option value="provide_vcf">Provide VCF file</option> | |
227 </param> | |
228 <when value="do_not_provide"> | |
229 <!-- Do nothing here --> | |
230 </when> | |
231 <when value="provide_vcf"> | |
232 <param name="input_variant_vcf" type="data" format="vcf" label="Use variants reported in VCF file as input to the algorithm" /> | |
233 <param name="only_use_input_alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="False" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> | |
234 </when> | |
235 </conditional> | |
236 | |
237 | |
238 <!-- reporting --> | |
239 <conditional name="section_reporting_type"> | |
240 <param name="section_reporting_type_selector" type="select" label="Set Reporting options"> | |
241 <option value="do_not_set" selected="True">Do not set</option> | |
242 <option value="set">Set</option> | |
243 </param> | |
244 <when value="do_not_set"> | |
245 <!-- do nothing here --> | |
246 </when> | |
247 <when value="set"> | |
248 <param name="pvar" type="float" label="Report sites if the probability that there is a polymorphism at the site is greater" value="0.0001" /> | |
249 <param name="show_reference_repeats" type="boolean" truevalue="--show-reference-repeats" falsevalue="" checked="False" label="Calculate and show information about reference repeats" /> | |
250 </when> | |
251 </conditional> | |
252 | |
253 | |
254 <!-- population model --> | |
255 <conditional name="section_population_model_type"> | |
256 <param name="section_population_model_type_selector" type="select" label="Set population model options"> | |
257 <option value="do_not_set" selected="True">Do not set</option> | |
258 <option value="set">Set</option> | |
259 </param> | |
260 <when value="do_not_set"> | |
261 <!-- do nothing here --> | |
262 </when> | |
263 <when value="set"> | |
264 <param name="theta" type="float" label="expected mutation rate or pairwise nucleotide diversity among the population" value="0.001" help="This serves as the single parameter to the Ewens Sampling Formula prior model"/> | |
265 <param name="ploidy" type="integer" label="default ploidy for the analysis" value="2" /> | |
266 <param name="pooled" type="boolean" truevalue="--pooled" falsevalue="" checked="False" label="Assume that samples result from pooled sequencing" help="When using this flag, set --ploidy to the number of alleles in each sample." /> | |
267 </when> | |
268 </conditional> | |
269 | |
270 <!-- reference allele --> | |
271 <conditional name="use_reference_allele_type"> | |
272 <param name="use_reference_allele_type_selector" type="select" label="Include the reference allele in the analysis"> | |
273 <option value="do_not_include_reference_allele" selected="True">Do not include</option> | |
274 <option value="include_reference_allele">Include</option> | |
275 </param> | |
276 <when value="do_not_include_reference_allele"> | |
277 <!-- Do nothing here --> | |
278 </when> | |
279 <when value="include_reference_allele"> | |
280 <param name="diploid_reference" type="boolean" truevalue="--diploid-reference" falsevalue="" checked="False" label="Treat reference as diploid" /> | |
281 <param name="reference_quality_mq" type="integer" label="Assign mapping quality" value="100" /> | |
282 <param name="reference_quality_bq" type="integer" label="Assign base quality" value="60" /> | |
283 </when> | |
284 </conditional> | |
285 | |
286 <!-- allele scope --> | |
287 <conditional name="section_allele_scope_type"> | |
288 <param name="section_allele_scope_type_selector" type="select" label="Set allele scope options"> | |
289 <option value="do_not_set" selected="True">Do not set</option> | |
290 <option value="set">Set</option> | |
291 </param> | |
292 <when value="do_not_set"> | |
293 <!-- do nothing here --> | |
294 </when> | |
295 <when value="set"> | |
296 <param name="no_snps" type="boolean" truevalue="--no-snps" falsevalue="" checked="False" label="Ignore SNP alleles" /> | |
297 <param name="no_indels" type="boolean" truevalue="--no-indels" falsevalue="" checked="False" label="Ignore insertion and deletion alleles" /> | |
298 <param name="no_mnps" type="boolean" truevalue="--no-mnps" falsevalue="" checked="False" label="Ignore multi-nuceotide polymorphisms, MNPs" /> | |
299 <param name="no_complex" type="boolean" truevalue="--no-complex" falsevalue="" checked="False" label="Ignore complex events (composites of other classes)" /> | |
300 <param name="use_best_n_alleles" type="integer" label="Evaluate only the best N SNP alleles" value="0" min="0" help="Ranked by sum of supporting quality scores; Set to 0 to use all" /> | |
301 <param name="max_complex_gap" type="integer" label="Allow complex alleles with contiguous embedded matches of up to this length" value="" optional="True"/> | |
302 </when> | |
303 </conditional> | |
304 | |
305 <!-- indel realignment --> | |
306 <param name="left_align_indels" type="boolean" truevalue="--left-align-indels" falsevalue="" checked="False" label="Left-realign and merge gaps embedded in reads" /> | |
307 | |
308 <!-- input filters --> | |
309 <conditional name="section_input_filters_type"> | |
310 <param name="section_input_filters_type_selector" type="select" label="Set input filters options"> | |
311 <option value="do_not_set" selected="True">Do not set</option> | |
312 <option value="set">Set</option> | |
313 </param> | |
314 <when value="do_not_set"> | |
315 <!-- do nothing here --> | |
316 </when> | |
317 <when value="set"> | |
318 <param name="use_duplicate_reads" type="boolean" truevalue="--use-duplicate-reads" falsevalue="" checked="False" label="Include duplicate-marked alignments in the analysis" /> | |
319 <conditional name="no_filter_type"> | |
320 <param name="no_filter_type_selector" type="select" label="Apply filters"> | |
321 <option value="apply_filters" selected="True">Apply</option> | |
322 <option value="no_filters">Do not apply</option> | |
323 </param> | |
324 <when value="no_filters"> | |
325 <!-- Do nothing here --> <!-- no-filters --> | |
326 </when> | |
327 <when value="apply_filters"> | |
328 <param name="min_mapping_quality" type="integer" label="Exclude alignments from analysis if they have a mapping quality less than" value="30" /> | |
329 <param name="min_base_quality" type="integer" label="Exclude alleles from analysis if their supporting base quality less than" value="20" /> | |
330 <param name="min_supporting_quality_mq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have mapping quality" value="0" /> | |
331 <param name="min_supporting_quality_bq" type="integer" label="In order to consider an alternate allele, at least one supporting alignment must have base quality" value="0" /> | |
332 </when> | |
333 </conditional> | |
334 <param name="mismatch_base_quality_threshold" type="integer" label="Count mismatches toward read-mismatch-limit if the base quality of the mismatch is >=" value="10" /> | |
335 <param name="read_mismatch_limit" type="integer" label="Exclude reads with more than N mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
336 <param name="read_max_mismatch_fraction" type="float" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold" value="1.0" /> | |
337 <param name="read_snp_limit" type="integer" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold" value="" optional="True" /> | |
338 <param name="read_indel_limit" type="integer" label="Exclude reads with more than N separate gaps" value="" optional="True" /> | |
339 <param name="indel_exclusion_window" type="integer" label="Ignore portions of alignments this many bases from a putative insertion or deletion allele" value="0" /> | |
340 <param name="min_alternate_fraction" type="float" label="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position" value="0" /> | |
341 <param name="min_alternate_count" type="integer" label="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position" value="1" /> | |
342 <param name="min_alternate_qsum" type="integer" label="Require at least this sum of quality of observations supporting an alternate allele within a single individual in order to evaluate the position" value="0" /> | |
343 <param name="min_alternate_total" type="integer" label="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis" value="1" /> | |
344 <param name="min_coverage" type="integer" label="Require at least this coverage to process a site" value="0" /> | |
345 </when> | |
346 </conditional> | |
347 | |
348 | |
349 <!-- bayesian priors --> | |
350 <conditional name="section_bayesian_priors_type"> | |
351 <param name="section_bayesian_priors_type_selector" type="select" label="Set bayesian priors options"> | |
352 <option value="do_not_set" selected="True">Do not set</option> | |
353 <option value="set">Set</option> | |
354 </param> | |
355 <when value="do_not_set"> | |
356 <!-- do nothing here --> | |
357 </when> | |
358 <when value="set"> | |
359 <param name="no_ewens_priors" type="boolean" truevalue="--no-ewens-priors" falsevalue="" checked="False" label="Turns off the Ewens' Sampling Formula component of the priors" /> | |
360 <param name="no_population_priors" type="boolean" truevalue="--no-population-priors" falsevalue="" checked="False" label="No population priors" help="Equivalent to --pooled --no-ewens-priors" /> | |
361 <param name="hwe_priors" type="boolean" truevalue="--hwe-priors" falsevalue="" checked="False" label="Use the probability of the combination arising under HWE given the allele frequency as estimated by observation frequency" /> | |
362 </when> | |
363 </conditional> | |
364 | |
365 <!-- observation prior expectations --> | |
366 <conditional name="section_observation_prior_expectations_type"> | |
367 <param name="section_observation_prior_expectations_type_selector" type="select" label="Set observation prior expectations options"> | |
368 <option value="do_not_set" selected="True">Do not set</option> | |
369 <option value="set">Set</option> | |
370 </param> | |
371 <when value="do_not_set"> | |
372 <!-- do nothing here --> | |
373 </when> | |
374 <when value="set"> | |
375 <param name="binomial_obs_priors" type="boolean" truevalue="--binomial-obs-priors" falsevalue="" checked="False" label="Incorporate expectations about osbervations into the priors, Uses read placement probability, strand balance probability, and read position (5'-3') probability" /> | |
376 <param name="allele_balance_priors" type="boolean" truevalue="--allele-balance-priors" falsevalue="" checked="False" label="Use aggregate probability of observation balance between alleles as a component of the priors. Best for observations with minimal inherent reference bias" /> | |
377 </when> | |
378 </conditional> | |
379 | |
380 | |
381 <!-- algorithmic features --> | |
382 <conditional name="section_algorithmic_features_type"> | |
383 <param name="section_algorithmic_features_type_selector" type="select" label="Set algorithmic features options"> | |
384 <option value="do_not_set" selected="True">Do not set</option> | |
385 <option value="set">Set</option> | |
386 </param> | |
387 <when value="do_not_set"> | |
388 <!-- do nothing here --> | |
389 </when> | |
390 <when value="set"> | |
391 <param name="site_selection_max_iterations" type="integer" label="Uses hill-climbing algorithm to search posterior space for N iterations to determine if the site should be evaluated." value="5" help="Set to 0 to prevent use of this algorithm for site selection, and to a low integer for improvide site selection at a slight performance penalty" /> | |
392 <param name="genotyping_max_iterations" type="integer" label="Iterate no more than N times during genotyping step" value="25" /> | |
393 <param name="genotyping_max_banddepth" type="integer" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" value="6" /> | |
394 <param name="posterior_integration_limits_n" type="integer" label="Posteriror integration limit N" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="1" /> | |
395 <param name="posterior_integration_limits_m" type="integer" label="Posteriror integration limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood." value="3" /> | |
396 <param name="no_permute" type="boolean" truevalue="--no-permute" falsevalue="" checked="False" label="Do not scale prior probability of genotype combination given allele frequency by the number of permutations of included genotypes" /> | |
397 <param name="exclude_unobserved_genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="False" label="Skip sample genotypings for which the sample has no supporting reads" /> | |
398 <param name="genotype_variant_threshold" type="integer" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" value="" optional="True" /> | |
399 <param name="use_mapping_quality" type="boolean" truevalue="--use-mapping-quality" falsevalue="" checked="False" label="Use mapping quality of alleles when calculating data likelihoods" /> | |
400 <param name="read_dependence_factor" type="float" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" value="0.9" /> | |
401 <param name="no_marginals" type="boolean" truevalue="--no-marginals" falsevalue="" checked="False" label="Do not calculate the marginal probability of genotypes. Saves time and improves scaling performance in large populations" /> | |
402 </when> | |
403 </conditional> | |
404 | |
405 | |
406 </when> | |
407 </conditional> | |
408 | |
409 </inputs> | |
410 <outputs> | |
411 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> | |
412 <data format="bed" name="output_failed_alleles_bed" label="${tool.name} on ${on_string} (failed alleles)"> | |
413 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_failed_alleles_option'] is True</filter> | |
414 </data> | |
415 <data format="txt" name="output_trace" label="${tool.name} on ${on_string} (trace)"> | |
416 <filter>options_type['options_type_selector'] == "advanced" and options_type['output_trace_option'] is True</filter> | |
417 </data> | |
418 </outputs> | |
419 <tests> | |
420 <test> | |
421 <param name="reference_source_selector" value="history" /> | |
422 <param name="ref_file" ftype="fasta" value="phiX.fasta"/> | |
4
e54bb199a47b
Fixed git revision string in tool_dependencies.xml and added functional test data
greg <greg@bx.psu.edu>
parents:
2
diff
changeset
|
423 <param name="input_bam" ftype="bam" value="fake_phiX_reads_1.bam"/> |
0 | 424 <param name="options_type_selector" value="basic"/> |
4
e54bb199a47b
Fixed git revision string in tool_dependencies.xml and added functional test data
greg <greg@bx.psu.edu>
parents:
2
diff
changeset
|
425 <output name="output_vcf" file="freebayes_out_1.vcf.contains" compare="contains"/> |
0 | 426 <!-- <output name="output_failed_alleles_bed" file="empty_file.dat" /> |
4
e54bb199a47b
Fixed git revision string in tool_dependencies.xml and added functional test data
greg <greg@bx.psu.edu>
parents:
2
diff
changeset
|
427 <output name="output_trace" file="freebayes_out_1.output_trace" /> --> |
0 | 428 </test> |
429 </tests> | |
430 <help> | |
431 **What it does** | |
432 | |
433 This tool uses FreeBayes to call SNPS given a reference sequence and a BAM alignment file. | |
434 | |
435 FreeBayes is a high-performance, flexible, and open-source Bayesian genetic variant detector. It operates on BAM alignment files, which are produced by most contemporary short-read aligners. | |
436 | |
437 In addition to substantial performance improvements over its predecessors (PolyBayes, GigaBayes, and BamBayes), it expands the scope of SNP and small-indel variant calling to populations of individuals with heterogeneous copy number. FreeBayes is currently under active development. | |
438 | |
439 Go `here <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_ for details on FreeBayes. | |
440 | |
441 ------ | |
442 | |
443 **Inputs** | |
444 | |
445 FreeBayes accepts an input aligned BAM file. | |
446 | |
447 | |
448 **Outputs** | |
449 | |
450 The output is in the VCF format. | |
451 | |
452 ------- | |
453 | |
454 **Settings**:: | |
455 | |
456 input and output: | |
457 | |
458 -b --bam FILE Add FILE to the set of BAM files to be analyzed. | |
459 -c --stdin Read BAM input on stdin. | |
460 -v --vcf FILE Output VCF-format results to FILE. | |
461 -f --fasta-reference FILE | |
462 Use FILE as the reference sequence for analysis. | |
463 An index file (FILE.fai) will be created if none exists. | |
464 If neither --targets nor --region are specified, FreeBayes | |
465 will analyze every position in this reference. | |
466 -t --targets FILE | |
467 Limit analysis to targets listed in the BED-format FILE. | |
468 -r --region <chrom>:<start_position>..<end_position> | |
469 Limit analysis to the specified region, 0-base coordinates, | |
470 end_position not included (same as BED format). | |
471 -s --samples FILE | |
472 Limit analysis to samples listed (one per line) in the FILE. | |
473 By default FreeBayes will analyze all samples in its input | |
474 BAM files. | |
475 --populations FILE | |
476 Each line of FILE should list a sample and a population which | |
477 it is part of. The population-based bayesian inference model | |
478 will then be partitioned on the basis of the populations. | |
479 -A --cnv-map FILE | |
480 Read a copy number map from the BED file FILE, which has | |
481 the format: | |
482 reference sequence, start, end, sample name, copy number | |
483 ... for each region in each sample which does not have the | |
484 default copy number as set by --ploidy. | |
485 -L --trace FILE Output an algorithmic trace to FILE. | |
486 --failed-alleles FILE | |
487 Write a BED file of the analyzed positions which do not | |
488 pass --pvar to FILE. | |
489 -@ --variant-input VCF | |
490 Use variants reported in VCF file as input to the algorithm. | |
491 A report will be generated for every record in the VCF file. | |
492 -l --only-use-input-alleles | |
493 Only provide variant calls and genotype likelihoods for sites | |
494 and alleles which are provided in the VCF input, and provide | |
495 output in the VCF for all input alleles, not just those which | |
496 have support in the data. | |
497 | |
498 reporting: | |
499 | |
500 -P --pvar N Report sites if the probability that there is a polymorphism | |
501 at the site is greater than N. default: 0.0001 | |
502 -_ --show-reference-repeats | |
503 Calculate and show information about reference repeats in | |
504 the VCF output. | |
505 | |
506 population model: | |
507 | |
508 -T --theta N The expected mutation rate or pairwise nucleotide diversity | |
509 among the population under analysis. This serves as the | |
510 single parameter to the Ewens Sampling Formula prior model | |
511 default: 0.001 | |
512 -p --ploidy N Sets the default ploidy for the analysis to N. default: 2 | |
513 -J --pooled Assume that samples result from pooled sequencing. | |
514 When using this flag, set --ploidy to the number of | |
515 alleles in each sample. | |
516 | |
517 reference allele: | |
518 | |
519 -Z --use-reference-allele | |
520 This flag includes the reference allele in the analysis as | |
521 if it is another sample from the same population. | |
522 -H --diploid-reference | |
523 If using the reference sequence as a sample (-Z), | |
524 treat it as diploid. default: false (reference is haploid) | |
525 --reference-quality MQ,BQ | |
526 Assign mapping quality of MQ to the reference allele at each | |
527 site and base quality of BQ. default: 100,60 | |
528 | |
529 allele scope: | |
530 | |
531 -I --no-snps Ignore SNP alleles. | |
532 -i --no-indels Ignore insertion and deletion alleles. | |
533 -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs. | |
534 -u --no-complex Ignore complex events (composites of other classes). | |
535 -n --use-best-n-alleles N | |
536 Evaluate only the best N SNP alleles, ranked by sum of | |
537 supporting quality scores. (Set to 0 to use all; default: all) | |
538 -E --max-complex-gap N | |
539 Allow complex alleles with contiguous embedded matches of up | |
540 to this length. | |
541 | |
542 indel realignment: | |
543 | |
544 -O --left-align-indels | |
545 Left-realign and merge gaps embedded in reads. default: false | |
546 | |
547 input filters: | |
548 | |
549 -4 --use-duplicate-reads | |
550 Include duplicate-marked alignments in the analysis. | |
551 default: exclude duplicates | |
552 -m --min-mapping-quality Q | |
553 Exclude alignments from analysis if they have a mapping | |
554 quality less than Q. default: 30 | |
555 -q --min-base-quality Q | |
556 Exclude alleles from analysis if their supporting base | |
557 quality is less than Q. default: 20 | |
558 -R --min-supporting-quality MQ,BQ | |
559 In order to consider an alternate allele, at least one supporting | |
560 alignment must have mapping quality MQ, and one supporting | |
561 allele must have base quality BQ. default: 0,0, unset | |
562 -Q --mismatch-base-quality-threshold Q | |
563 Count mismatches toward --read-mismatch-limit if the base | |
564 quality of the mismatch is >= Q. default: 10 | |
565 -U --read-mismatch-limit N | |
566 Exclude reads with more than N mismatches where each mismatch | |
567 has base quality >= mismatch-base-quality-threshold. | |
568 default: ~unbounded | |
569 -z --read-max-mismatch-fraction N | |
570 Exclude reads with more than N [0,1] fraction of mismatches where | |
571 each mismatch has base quality >= mismatch-base-quality-threshold | |
572 default: 1.0 | |
573 -$ --read-snp-limit N | |
574 Exclude reads with more than N base mismatches, ignoring gaps | |
575 with quality >= mismatch-base-quality-threshold. | |
576 default: ~unbounded | |
577 -e --read-indel-limit N | |
578 Exclude reads with more than N separate gaps. | |
579 default: ~unbounded | |
580 -0 --no-filters Do not use any input base and mapping quality filters | |
581 Equivalent to -m 0 -q 0 -R 0 -S 0 | |
582 -x --indel-exclusion-window | |
583 Ignore portions of alignments this many bases from a | |
584 putative insertion or deletion allele. default: 0 | |
585 -F --min-alternate-fraction N | |
586 Require at least this fraction of observations supporting | |
587 an alternate allele within a single individual in the | |
588 in order to evaluate the position. default: 0.0 | |
589 -C --min-alternate-count N | |
590 Require at least this count of observations supporting | |
591 an alternate allele within a single individual in order | |
592 to evaluate the position. default: 1 | |
593 -3 --min-alternate-qsum N | |
594 Require at least this sum of quality of observations supporting | |
595 an alternate allele within a single individual in order | |
596 to evaluate the position. default: 0 | |
597 -G --min-alternate-total N | |
598 Require at least this count of observations supporting | |
599 an alternate allele within the total population in order | |
600 to use the allele in analysis. default: 1 | |
601 -! --min-coverage N | |
602 Require at least this coverage to process a site. default: 0 | |
603 | |
604 bayesian priors: | |
605 | |
606 -Y --no-ewens-priors | |
607 Turns off the Ewens' Sampling Formula component of the priors. | |
608 -k --no-population-priors | |
609 Equivalent to --pooled --no-ewens-priors | |
610 -w --hwe-priors Use the probability of the combination arising under HWE given | |
611 the allele frequency as estimated by observation frequency. | |
612 | |
613 observation prior expectations: | |
614 | |
615 -V --binomial-obs-priors | |
616 Incorporate expectations about osbervations into the priors, | |
617 Uses read placement probability, strand balance probability, | |
618 and read position (5'-3') probability. | |
619 -a --allele-balance-priors | |
620 Use aggregate probability of observation balance between alleles | |
621 as a component of the priors. Best for observations with minimal | |
622 inherent reference bias. | |
623 | |
624 algorithmic features: | |
625 | |
626 -M --site-selection-max-iterations N | |
627 Uses hill-climbing algorithm to search posterior space for N | |
628 iterations to determine if the site should be evaluated. Set to 0 | |
629 to prevent use of this algorithm for site selection, and | |
630 to a low integer for improvide site selection at a slight | |
631 performance penalty. default: 5. | |
632 -B --genotyping-max-iterations N | |
633 Iterate no more than N times during genotyping step. default: 25. | |
634 --genotyping-max-banddepth N | |
635 Integrate no deeper than the Nth best genotype by likelihood when | |
636 genotyping. default: 6. | |
637 -W --posterior-integration-limits N,M | |
638 Integrate all genotype combinations in our posterior space | |
639 which include no more than N samples with their Mth best | |
640 data likelihood. default: 1,3. | |
641 -K --no-permute | |
642 Do not scale prior probability of genotype combination given allele | |
643 frequency by the number of permutations of included genotypes. | |
644 -N --exclude-unobserved-genotypes | |
645 Skip sample genotypings for which the sample has no supporting reads. | |
646 -S --genotype-variant-threshold N | |
647 Limit posterior integration to samples where the second-best | |
648 genotype likelihood is no more than log(N) from the highest | |
649 genotype likelihood for the sample. default: ~unbounded | |
650 -j --use-mapping-quality | |
651 Use mapping quality of alleles when calculating data likelihoods. | |
652 -D --read-dependence-factor N | |
653 Incorporate non-independence of reads by scaling successive | |
654 observations by this factor during data likelihood | |
655 calculations. default: 0.9 | |
656 -= --no-marginals | |
657 Do not calculate the marginal probability of genotypes. Saves | |
658 time and improves scaling performance in large populations. | |
659 | |
660 | |
661 ------ | |
662 | |
663 **Citation** | |
664 | |
665 For the underlying tool, please cite `FreeBayes <http://bioinformatics.bc.edu/marthlab/FreeBayes>`_. | |
666 | |
667 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | |
668 | |
669 </help> | |
670 </tool> |