diff freebayes.xml @ 40:0d8581534009 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freebayes commit 48700fed491e8056cddd1ee7d8bc9ca08e19fe8d
author iuc
date Tue, 05 Aug 2025 13:47:58 +0000
parents 85dc96ebc770
children
line wrap: on
line diff
--- a/freebayes.xml	Tue Jun 10 07:59:51 2025 +0000
+++ b/freebayes.xml	Tue Aug 05 13:47:58 2025 +0000
@@ -1,14 +1,15 @@
-<tool id="freebayes" name="FreeBayes" version="@TOOL_VERSION@+galaxy0">
+<tool id="freebayes" name="FreeBayes" version="@TOOL_VERSION@+galaxy1" profile="23.0">
     <description>bayesian genetic variant detector</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <xrefs>
         <xref type="bio.tools">freebayes</xref>
     </xrefs>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
     <expand macro="requirements">
+        <requirement type="package" version="3.11">grep</requirement>
         <requirement type="package" version="5.3.1">gawk</requirement>
-        <requirement type="package" version="20250422">parallel</requirement>
+        <requirement type="package" version="20250622">parallel</requirement>
     </expand>
     <expand macro="version_command" />
     <command detect_errors="exit_code"><![CDATA[
@@ -239,6 +240,18 @@
             #end if
         #end if
 
+        #if str($output_options.flavor) == "gvcf":
+          --gvcf
+        #elif str($output_options.flavor) == "gvcf_custom":
+          --gvcf
+          #if $output_options.gvcf_block_size == 0:
+            ## special-casing the 0 case for performance
+            --gvcf-dont-use-chunk t
+          #else:
+            --gvcf-chunk $output_options.gvcf_block_size
+          #end if
+        #end if
+
         ";
     done > freebayes_commands.sh &&
 
@@ -366,21 +379,21 @@
                             <when value="provide_vcf">
                                 <param name="input_variant_vcf" argument="--variant-input" type="data" format="vcf_bgzip"
                                        label="Use variants reported in this VCF dataset as input to the algorithm"/>
-                                <param name="only_use_input_alleles" argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false"
+                                <param argument="--only-use-input-alleles" type="boolean" truevalue="--only-use-input-alleles" falsevalue="" checked="false"
                                        label="Only provide variant calls and genotype likelihoods for sites in VCF" />
                             </when>
                         </conditional>
-                        <param name="haplotype_basis_alleles" argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true"
+                        <param argument="--haplotype-basis-alleles" type="data" format="vcf" optional="true"
                                label="Only use variant alleles provided in this input VCF for the construction of complex or haplotype alleles" />
-                        <param name="report_monomorphic" argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false"
+                        <param argument="--report-monomorphic" type="boolean" truevalue="--report-monomorphic" falsevalue="" checked="false"
                                label="Report even loci which appear to be monomorphic, and report all considered alleles, even those which are not in called genotypes" />
-                        <param name="observation_bias" argument="--observation-bias" type="data" format="tabular" optional="true"
+                        <param argument="--observation-bias" type="data" format="tabular" optional="true"
                                label="Load read length-dependent allele observation biases from"
                                help="The format is [length] [alignment efficiency relative to reference] where the efficiency is 1 if there is no relative observation bias" />
-                        <param name="contamination_estimates" argument="--contamination-estimates" type="data" format="tabular" optional="true"
+                        <param argument="--contamination-estimates" type="data" format="tabular" optional="true"
                                label="Upload per-sample estimates of contamination from"
                                help="The format should be: sample p(read=R|genotype=AR) p(read=A|genotype=AA) Sample '*' can be used to set default contamination estimates" />
-                        <param name="trim_complex_tail" argument="--trim-complex-tail" type="boolean" truevalue="--trim-complex-tail" falsevalue=""
+                        <param argument="--trim-complex-tail" type="boolean" truevalue="--trim-complex-tail" falsevalue=""
                                label="Trim trailing reference matches" />
                     </when>
                     <when value="do_not_set" />
@@ -432,7 +445,7 @@
                     <when value="set">
                         <param name="Z" argument="--use-reference-allele" type="boolean" truevalue="-Z" falsevalue="" checked="false"
                                label="Include the reference allele in the analysis as if it is another sample from the same population" />
-                        <param name="reference_quality" argument="--reference-quality" type="text" value="100,60"
+                        <param argument="--reference-quality" type="text" value="100,60"
                                label="Assign mapping quality of MQ (100) to the reference allele at each site and base quality of BQ (60)" />
                     </when>
                     <when value="do_not_set" />
@@ -457,13 +470,13 @@
                         <param name="n" argument="--use-best-n-alleles" type="integer" value="0"
                                label="How many best SNP alleles to evaluate"
                                help="Alleles are ranked by the sum of supporting quality scores. Set to 0 to evaluate all" />
-                        <param name="haplotype_length" argument="--haplotype-length" type="integer" value="3"
+                        <param argument="--haplotype-length" type="integer" value="3"
                                label="Allow haplotype calls with contiguous embedded matches of up to (nucleotides)" />
                         <param name="min_repeat_length" argument="--min-repeat-size" type="integer" value="5"
                                label="When assembling observations across repeats, require the total repeat length at least this many bp" />
-                        <param name="min_repeat_entropy" argument="--min-repeat-entropy" type="integer" value="1"
+                        <param argument="--min-repeat-entropy" type="integer" value="1"
                                label="To detect interrupted repeats, build across sequence until it has entropy > (bits per bp)" />
-                        <param name="no_partial_observations" argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
+                        <param argument="--no-partial-observations" type="boolean" truevalue="--no-partial-observations" falsevalue="" checked="false"
                                label="Exclude observations which do not fully span the dynamically-determined detection window"
                                help="By default, FreeBayes uses all observations, dividing partial support across matching haplotypes when generating haplotypes" />
                     </when>
@@ -508,7 +521,7 @@
                                        help="default=~unbounded" />
                                 <param argument="--read-max-mismatch-fraction" type="float" value="1.0" min="0.0" max="1.0"
                                        label="Exclude reads with more than N [0,1] fraction of mismatches where each mismatch has base quality >= mismatch-base-quality-threshold (second option above)" />
-                                <param name="read_snp_limit" argument="--read-snp-limit" type="integer" value="1000"
+                                <param argument="--read-snp-limit" type="integer" value="1000"
                                        label="Exclude reads with more than N base mismatches, ignoring gaps with quality >= mismatch-base-quality-threshold (third option above)"
                                        help="default=~unbounded" />
                             </when>
@@ -559,12 +572,12 @@
                         <option value="set">Set genotype likelihood options</option>
                     </param>
                     <when value="set">
-                        <param name="base_quality_cap" argument="--base-quality-cap" type="integer" value="0"
+                        <param argument="--base-quality-cap" type="integer" value="0"
                                label="Limit estimated observation quality by capping base quality at" />
-                        <param name="experimental_gls" argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false"
+                        <param argument="--experimental-gls" type="boolean" truevalue="--experimental-gls" falsevalue="" checked="false"
                                label="Generate genotype likelihoods using 'effective base depth' metric qual = 1-BaseQual * 1-MapQual"
                                help="Incorporate partial observations. This is the default when contamination estimates are provided. Optimized for diploid samples" />
-                        <param name="prob_contamination" argument="--prob-contamination" type="float" value="10e-9"
+                        <param argument="--prob-contamination" type="float" value="10e-9"
                                label="An estimate of contamination to use for all samples" />
                     </when>
                     <when value="do_not_set" />
@@ -578,17 +591,17 @@
                         <option value="set">Set algorithmic features</option>
                     </param>
                     <when value="set">
-                        <param name="report_genotype_likelihood_max" argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false"
+                        <param argument="--report-genotype-likelihood-max" type="boolean" truevalue="--report-genotype-likelihood-max" falsevalue="" checked="false"
                                label="Report genotypes using the maximum-likelihood estimate provided from genotype likelihoods" />
                         <param name="B" argument="--genotyping-max-iterations" type="integer" value="1000"
                                label="Iterate no more than N times during genotyping step" />
-                        <param name="genotyping_max_banddepth" argument="--genotyping-max-banddepth" type="integer" value="6"
+                        <param argument="--genotyping-max-banddepth" type="integer" value="6"
                                label="Integrate no deeper than the Nth best genotype by likelihood when genotyping" />
                         <param name="W" argument="--posterior-integration-limits" type="text" value="1,3"
                                label="Integrate all genotype combinations in our posterior space which include no more than N (1) samples with their Mth (3) best data likelihood" />
                         <param name="N" argument="--exclude-unobserved-genotypes" type="boolean" truevalue="--exclude-unobserved-genotypes" falsevalue="" checked="false"
                                label="Skip sample genotypings for which the sample has no supporting reads" />
-                        <param name="genotype_variant_threshold" argument="--genotype-variant-threshold" type="integer" value="" optional="true"
+                        <param argument="--genotype-variant-threshold" type="integer" value="" optional="true"
                                label="Limit posterior integration to samples where the second-best genotype likelihood is no more than log(N) from the highest genotype likelihood for the sample"
                                help="default=~unbounded" />
                         <param name="j" argument="--use-mapping-quality" type="boolean" truevalue="-j" falsevalue="" checked="false"
@@ -598,7 +611,7 @@
                                help="By default, FreeBayes uses a minimum Base Quality in flanking sequence" />
                         <param name="D" argument="--read-dependence-factor" type="float" value="0.9"
                                label="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations" />
-                        <param name="genotype_qualities" argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false"
+                        <param argument="--genotype-qualities" type="boolean" truevalue="--genotype-qualities" falsevalue="" checked="false"
                                label="Calculate the marginal probability of genotypes and report as GQ in each sample field in the VCF output" />
                     </when>
                     <when value="do_not_set" />
@@ -609,6 +622,18 @@
             <when value="naive" />
             <when value="naive_w_filters" />
         </conditional>
+        <conditional name="output_options">
+            <param name="flavor" type="select" label="Type of main output to produce" help="The tool will, by default, produce VCF output with information about sites with called variants. If you want also information (such as depth of coverage) about non-called sites, you can use the gVCF or gVCF with custom block size options. The first collapses the stats of entire blocks of consecutive non-called sites into one non-call record. The second gives you control over how many consecutive non-called sites should be combined into a non-call record.">
+                <option value="vcf">VCF (default)</option>
+                <option value="gvcf">gVCF (--gvcf)</option>
+                <option value="gvcf_custom">gVCF with custom block size</option>
+            </param>
+            <when value="vcf" />
+            <when value="gvcf" />
+            <when value="gvcf_custom">
+                <param name="gvcf_block_size" type="integer" value="0" min="0" max="2147483647" label="Maximal block size for consecutive non-called sites" help="Consolidate data from any non-called site together with the N next non-called sites into one gVCF record, i.e. produce gVCF block records of maximally N+1 sites. Blocks can be shorter if terminated by the end of a chromosome or a site with a called variant. Setting this parameter to 0 produces a separate record for every non-called site, but can result in very large output." />
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (variants)" />
@@ -620,7 +645,7 @@
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
@@ -628,7 +653,31 @@
             <param name="options_type_selector" value="simple"/>
             <output name="output_vcf" file="freebayes-phix174-test1.vcf" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
+            <param name="reference_source_selector" value="history" />
+            <param name="processmode" value="individual" />
+            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta" />
+            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam" />
+            <param name="options_type_selector" value="simple" />
+            <conditional name="output_options">
+                <param name="flavor" value="gvcf" />
+            </conditional>
+            <output name="output_vcf" file="freebayes-phix174.gvcf" lines_diff="4" />
+        </test>
+        <test expect_num_outputs="1">
+            <param name="reference_source_selector" value="history" />
+            <param name="processmode" value="individual" />
+            <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta" />
+            <param name="input_bams" ftype="bam" value="freebayes-phix174.bam" />
+            <param name="options_type_selector" value="simple" />
+            <conditional name="output_options">
+                <param name="flavor" value="gvcf_custom" />
+            </conditional>
+            <!-- This test produces one record per reference position
+            so the test file only contains the first part of the expected output up to the second variant site -->
+            <output name="output_vcf" file="freebayes-phix174.full.sample.gvcf" compare="contains" lines_diff="2" />
+        </test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
@@ -639,7 +688,7 @@
             <output name="output_vcf" file="freebayes-phix174-test2.vcf" lines_diff="4" />
         </test>
         <!-- Test that user-provided (variant-input option) sites are included in output -->
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
@@ -658,7 +707,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>
@@ -669,7 +718,7 @@
             <param name="trim_complex_tail" value="--trim-complex-tail"/>
             <output name="output_vcf" file="freebayes-phix174-test4.vcf" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
@@ -679,7 +728,7 @@
             <param name="min_coverage" value="250" />
             <output name="output_vcf" file="freebayes-hxb2-test5.vcf" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
@@ -689,7 +738,7 @@
             <param name="limit_coverage" value="400" />
             <output name="output_vcf" file="freebayes-hxb2-test6.vcf" lines_diff="4" />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-hxb2.fasta"/>
@@ -699,7 +748,7 @@
             <param name="skip_coverage" value="100" />
             <output name="output_vcf" file="freebayes-hxb2-test7.vcf" lines_diff="4" />
         </test>
-        <test> <!-- Test with CRAM -->
+        <test expect_num_outputs="1"> <!-- Test with CRAM -->
             <param name="reference_source_selector" value="history" />
             <param name="processmode" value="individual" />
             <param name="ref_file" ftype="fasta" value="freebayes-phix174.fasta"/>