Mercurial > repos > devteam > freebayes
diff freebayes.xml @ 40:0d8581534009 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 48700fed491e8056cddd1ee7d8bc9ca08e19fe8d
| author | iuc |
|---|---|
| date | Tue, 05 Aug 2025 13:47:58 +0000 |
| parents | 85dc96ebc770 |
| children |
line wrap: on
line diff
--- a/freebayes.xml Tue Jun 10 07:59:51 2025 +0000 +++ b/freebayes.xml Tue Aug 05 13:47:58 2025 +0000 @@ -1,14 +1,15 @@ -<tool id="freebayes" name="FreeBayes" version="@TOOL_VERSION@+galaxy0"> +<tool id="freebayes" name="FreeBayes" version="@TOOL_VERSION@+galaxy1" profile="23.0"> <description>bayesian genetic variant detector</description> + <macros> + <import>macros.xml</import> + </macros> <xrefs> <xref type="bio.tools">freebayes</xref> </xrefs> - <macros> - <import>macros.xml</import> - </macros> <expand macro="requirements"> + <requirement type="package" version="3.11">grep</requirement> <requirement type="package" version="5.3.1">gawk</requirement> - <requirement type="package" version="20250422">parallel</requirement> + <requirement type="package" version="20250622">parallel</requirement> </expand> <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ @@ -239,6 +240,18 @@ #end if #end if + #if str($output_options.flavor) == "gvcf": + --gvcf + #elif str($output_options.flavor) == "gvcf_custom": + --gvcf + #if $output_options.gvcf_block_size == 0: + ## special-casing the 0 case for performance + --gvcf-dont-use-chunk t + #else: + --gvcf-chunk $output_options.gvcf_block_size + #end if + #end if + "; done > freebayes_commands.sh && @@ -366,21 +379,21 @@ <when value="provide_vcf"> <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip" label="Use variants reported in this VCF dataset as input to the algorithm"/> - <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false" + <param argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false" label="Only provide variant calls and genotype likelihoods for sites in VCF" /> </when> </conditional> - <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true" + <param argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true" label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" /> - <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false" + <param argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false" label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" /> - <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true" + <param argument="--observation-bias" type="data" format="tabular" optional="true" label="Load read length-dependent allele observation biases from" help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" /> - <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true" + <param argument="--contamination-estimates" type="data" format="tabular" optional="true" label="Upload per-sample estimates of contamination from" help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" /> - <param name="trim_complex_tail" argument="--trim-complex-tail" type="boolean" truevalue="--trim-complex-tail" falsevalue="" + <param argument="--trim-complex-tail" type="boolean" truevalue="--trim-complex-tail" falsevalue="" label="Trim trailing reference matches" /> </when> <when value="do_not_set" /> @@ -432,7 +445,7 @@ <when value="set"> <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false" label="Include the reference allele in the analysis as if it is another sample from the same population" /> - <param name="reference_quality" argument="--reference-quality" type="text" value="100,60" + <param argument="--reference-quality" type="text" value="100,60" label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" /> </when> <when value="do_not_set" /> @@ -457,13 +470,13 @@ <param name="n" argument="--use-best-n-alleles" type="integer" value="0" label="How many best SNP alleles to evaluate" help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" /> - <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3" + <param argument="--haplotype-length" type="integer" value="3" label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" /> <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5" label="When assembling observations across repeats, require the total repeat length at least this many bp" /> - <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1" + <param argument="--min-repeat-entropy" type="integer" value="1" label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" /> - <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false" + <param argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false" label="Exclude observations which do not fully span the dynamically-determined detection window" help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" /> </when> @@ -508,7 +521,7 @@ help="default=~unbounded" /> <param argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0" label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" /> - <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000" + <param argument="--read-snp-limit" type="integer" value="1000" label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)" help="default=~unbounded" /> </when> @@ -559,12 +572,12 @@ <option value="set">Set genotype likelihood options</option> </param> <when value="set"> - <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0" + <param argument="--base-quality-cap" type="integer" value="0" label="Limit estimated observation quality by capping base quality at" /> - <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false" + <param argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false" label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual" help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" /> - <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9" + <param argument="--prob-contamination" type="float" value="10e-9" label="An estimate of contamination to use for all samples" /> </when> <when value="do_not_set" /> @@ -578,17 +591,17 @@ <option value="set">Set algorithmic features</option> </param> <when value="set"> - <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false" + <param argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false" label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" /> <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000" label="Iterate no more than N times during genotyping step" /> - <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6" + <param argument="--genotyping-max-banddepth" type="integer" value="6" label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" /> <param name="W" argument="--posterior-integration-limits" type="text" value="1,3" label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" /> <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false" label="Skip sample genotypings for which the sample has no supporting reads" /> - <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true" + <param argument="--genotype-variant-threshold" type="integer" value="" optional="true" label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample" help="default=~unbounded" /> <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false" @@ -598,7 +611,7 @@ help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" /> <param name="D" argument="--read-dependence-factor" type="float" value="0.9" label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" /> - <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false" + <param argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false" label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" /> </when> <when value="do_not_set" /> @@ -609,6 +622,18 @@ <when value="naive" /> <when value="naive_w_filters" /> </conditional> + <conditional name="output_options"> + <param name="flavor" type="select" label="Type of main output to produce" help="The tool will, by default, produce VCF output with information about sites with called variants. If you want also information (such as depth of coverage) about non-called sites, you can use the gVCF or gVCF with custom block size options. The first collapses the stats of entire blocks of consecutive non-called sites into one non-call record. The second gives you control over how many consecutive non-called sites should be combined into a non-call record."> + <option value="vcf">VCF (default)</option> + <option value="gvcf">gVCF (--gvcf)</option> + <option value="gvcf_custom">gVCF with custom block size</option> + </param> + <when value="vcf" /> + <when value="gvcf" /> + <when value="gvcf_custom"> + <param name="gvcf_block_size" type="integer" value="0" min="0" max="2147483647" label="Maximal block size for consecutive non-called sites" help="Consolidate data from any non-called site together with the N next non-called sites into one gVCF record, i.e. produce gVCF block records of maximally N+1 sites. Blocks can be shorter if terminated by the end of a chromosome or a site with a called variant. Setting this parameter to 0 produces a separate record for every non-called site, but can result in very large output." /> + </when> + </conditional> </inputs> <outputs> <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" /> @@ -620,7 +645,7 @@ </data> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> @@ -628,7 +653,31 @@ <param name="options_type_selector" value="simple"/> <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta" /> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam" /> + <param name="options_type_selector" value="simple" /> + <conditional name="output_options"> + <param name="flavor" value="gvcf" /> + </conditional> + <output name="output_vcf" file="freebayes-phix174.gvcf" lines_diff="4" /> + </test> + <test expect_num_outputs="1"> + <param name="reference_source_selector" value="history" /> + <param name="processmode" value="individual" /> + <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta" /> + <param name="input_bams" ftype="bam" value="freebayes-phix174.bam" /> + <param name="options_type_selector" value="simple" /> + <conditional name="output_options"> + <param name="flavor" value="gvcf_custom" /> + </conditional> + <!-- This test produces one record per reference position + so the test file only contains the first part of the expected output up to the second variant site --> + <output name="output_vcf" file="freebayes-phix174.full.sample.gvcf" compare="contains" lines_diff="2" /> + </test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> @@ -639,7 +688,7 @@ <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" /> </test> <!-- Test that user-provided (variant-input option) sites are included in output --> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> @@ -658,7 +707,7 @@ </assert_contents> </output> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/> @@ -669,7 +718,7 @@ <param name="trim_complex_tail" value="--trim-complex-tail"/> <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> @@ -679,7 +728,7 @@ <param name="min_coverage" value="250" /> <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> @@ -689,7 +738,7 @@ <param name="limit_coverage" value="400" /> <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" /> </test> - <test> + <test expect_num_outputs="1"> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/> @@ -699,7 +748,7 @@ <param name="skip_coverage" value="100" /> <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" /> </test> - <test> <!-- Test with CRAM --> + <test expect_num_outputs="1"> <!-- Test with CRAM --> <param name="reference_source_selector" value="history" /> <param name="processmode" value="individual" /> <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
