changeset 11:5a326a6fa105 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
author artbio
date Fri, 17 Oct 2025 17:21:17 +0000
parents 8711df965d4b
children
files lumpy_smoove.xml macro_lumpy_smoove.xml macros.xml test-data/result-1.vcf test-data/result-2.vcf test-data/result-3.vcf test-data/result-4.vcf test-data/result-5.vcf test-data/result-6.vcf test-data/result_cohort.vcf test-data/result_paired.vcf test-data/result_single.vcf test-data/test_vcf2hrdetect.tab vcf2hrdetect.py vcf2hrdetect.xml
diffstat 15 files changed, 702 insertions(+), 776 deletions(-) [+]
line wrap: on
line diff
--- a/lumpy_smoove.xml	Wed Jan 24 19:26:57 2024 +0000
+++ b/lumpy_smoove.xml	Fri Oct 17 17:21:17 2025 +0000
@@ -1,92 +1,97 @@
-<tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.8+galaxy1">
+<tool id="lumpy_smoove" name="lumpy_smoove" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
     <description>find structural variants using the smoove workflow</description>
     <macros>
-        <import>macro_lumpy_smoove.xml</import>
+        <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="0.7.1">svtyper</requirement>
-        <requirement type="package" version="0.2.8">smoove</requirement>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" level="fatal" description="Tool exception" />
-    </stdio>
+    <expand macro="lumpy_smoove_requirements" />
+    <expand macro="stdio" />
     <command detect_errors="exit_code"><![CDATA[
     @pipefail@
     @set_fasta_index@
+
     #if $set_plan.plan_choice=='pair':
-        ln -s $set_plan.normal_bam normal.bam &&
-        ln -f -s $set_plan.normal_bam.metadata.bam_index normal.bam.bai &&
-        ln -s $set_plan.tumor_bam tumor.bam &&
-        ln -f -s $set_plan.tumor_bam.metadata.bam_index tumor.bam.bai &&
+        ln -s '$set_plan.normal_bam' normal.bam &&
+        ln -f -s '$set_plan.normal_bam.metadata.bam_index' normal.bam.bai &&
+        ln -s '$set_plan.tumor_bam' tumor.bam &&
+        ln -f -s '$set_plan.tumor_bam.metadata.bam_index' tumor.bam.bai &&
     #elif $set_plan.plan_choice=='single':
-        ln -s $set_plan.single_bam single.bam &&
-        ln -f -s $set_plan.single_bam.metadata.bam_index single.bam.bai &&
+        ln -s '$set_plan.single_bam' single.bam &&
+        ln -f -s '$set_plan.single_bam.metadata.bam_index' single.bam.bai &&
     #else:
         #for $sample in $set_plan.cohort:
-            ln -s $sample ${sample.element_identifier}.bam &&
-            ln -f -s $sample.metadata.bam_index ${sample.element_identifier}.bam.bai &&
+            ln -s '$sample' '${sample.element_identifier}.bam' &&
+            ln -f -s '$sample.metadata.bam_index' '${sample.element_identifier}.bam.bai' &&
         #end for
     #end if
-   
+
     smoove call --name output
         #if $set_exclusion.choices=="yes":
-            --exclude $bedmask
+            --exclude '$bedmask'
         #end if
             --fasta reference.fa
             --processes \${GALAXY_SLOTS:-4}
             --genotype
-       #if $prpos=="no":
+       #if $removepr:
             --removepr
        #end if
-           *.bam &&
-    gunzip -c output-smoove.genotyped.vcf.gz > $vcf_call
+        *.bam &&
+    gunzip -c output-smoove.genotyped.vcf.gz > '$vcf_call'
 
     ]]></command>
     <inputs>
         <expand macro="reference_source_conditional" />
         <conditional name="set_plan">
-            <param name="plan_choice" type="select" label="Analyse a single Bam or a pair of Bam (eg normal/tumor)" display="radio">
-                <option value="pair" selected="true">A pair of Bam files</option>
-                <option value="single">A single Bam</option>
-                <option value="cohort">a small cohort of Bam files (less than ~40)</option>
+            <param name="plan_choice" type="select" label="Analysis mode" display="radio">
+                <option value="pair" selected="true">A pair of BAM files (e.g., normal/tumor)</option>
+                <option value="single">A single BAM file</option>
+                <option value="cohort">A small cohort of BAM files (less than ~40)</option>
             </param>
             <when value="pair">
-                <param format="bam" name="normal_bam" type="data" label="BAM alignment from the normal sample"/>
-                <param format="bam" name="tumor_bam" type="data" label="BAM alignment from the tumor sample"/>
+                <param format="bam" name="normal_bam" type="data" label="Normal/Reference sample BAM" />
+                <param format="bam" name="tumor_bam" type="data" label="Tumor/Case sample BAM" />
             </when>
             <when value="single">
-                <param format="bam" name="single_bam" type="data" label="BAM alignment from a single sample"/>
+                <param format="bam" name="single_bam" type="data" label="Single sample BAM file" />
             </when>
             <when value="cohort">
-                <param name="cohort" type="data_collection" format="bam" label="A collection of bam files" multiple="true"/>
+                <param name="cohort" type="data_collection" format="bam" label="A collection of BAM files" />
             </when>
-        </conditional>      
+        </conditional>
 
- 
         <conditional name="set_exclusion">
-            <param name="choices" type="select" label="exclude regions with a bed file" display="radio">
+            <param name="choices" type="select" label="Exclude genomic regions?" display="radio">
                 <option value="no" selected="true">No</option>
                 <option value="yes">Yes</option>
             </param>
             <when value="yes">
-                <param format="bed" name="bedmask" type="data" label="BED regions to be excluded for the analysis"/>
-            </when>
-            <when value="no">
+                <param format="bed" name="bedmask" type="data" label="BED file with regions to exclude" help="This is highly recommended to improve specificity. See help section" />
             </when>
-        </conditional>      
-        <param name="prpos" type="select" label="include the PRPOS probabilities in INFO tags" display="radio">
-            <option value="no" selected="true">No</option>
-            <option value="yes">Yes</option>
-        </param>
-   </inputs>
+            <when value="no" />
+        </conditional>
+        <param name="removepr" type="boolean" checked="true" truevalue="--removepr" falsevalue="" label="Do not include PRPOS probabilities in INFO tags" help="Use this advanced option to exclude the PRPOS field, which can reduce VCF file size." />
+    </inputs>
 
     <outputs>
         <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" />
     </outputs>
 
     <tests>
-        <test>
-
+        <test expect_num_outputs="1">
+            <conditional name="set_plan">
+                <param name="plan_choice" value="pair"/>
+                <param name="normal_bam" value="celegans_RG_1.bam" ftype="bam"/>
+                <param name="tumor_bam" value="celegans_RG_2.bam" ftype="bam"/>
+            </conditional>
+            <param name="reference_source|reference_source_selector" value="history" />
+            <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/>
+            <conditional name="set_exclusion">
+                <param name="choices" value="yes"/>
+                <param name="bedmask" value="exclude.bed" ftype="bed"/>
+            </conditional>
+            <param name="removepr" value="true"/>
+            <output name="vcf_call" file="result_paired.vcf" ftype="vcf" lines_diff="12"/>
+        </test>
+        <test expect_num_outputs="1">
             <conditional name="set_plan">
                 <param name="plan_choice" value="cohort"/>
                 <param name="cohort">
@@ -96,156 +101,65 @@
                     </collection>
                 </param>
             </conditional>
-            <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="choices" value="yes"/>
-            <param name="bedmask" value="exclude.bed"/>
-            <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-6.vcf" lines_diff="12"/>
-        </test>
-        <test>
-            <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="normal_bam" value="celegans_RG_1.bam"/>
-            <param name="tumor_bam" value="celegans_RG_2.bam"/>
-            <param name="choices" value="yes"/>
-            <param name="bedmask" value="exclude.bed"/>
-            <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="12"/>
-        </test>
-        <test>
-            <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="normal_bam" value="celegans_RG_1.bam"/>
-            <param name="tumor_bam" value="celegans_RG_2.bam"/>
-            <param name="choices" value="no"/>
-            <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="12"/>
+            <param name="reference_source|reference_source_selector" value="history" />
+            <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/>
+            <conditional name="set_exclusion">
+                <param name="choices" value="no"/>
+            </conditional>
+            <param name="removepr" value="false"/>
+            <output name="vcf_call" file="result_cohort.vcf" ftype="vcf" lines_diff="12"/>
         </test>
-        <test>
-            <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="normal_bam" value="celegans_RG_2.bam"/>
-            <param name="tumor_bam" value="celegans_RG_1.bam"/>
-            <param name="choices" value="no"/>
-            <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="12"/>
-        </test>
-        <test>
-            <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="normal_bam" value="celegans_RG_1.bam"/>
-            <param name="tumor_bam" value="celegans_RG_2.bam"/>
-            <param name="choices" value="no"/>
-            <param name="prpos" value="yes"/>
-            <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="12"/>
-        </test>
-        <test>
-            <param name="reference_source_selector" value="history" />
-            <param name="plan_choice" value="single" />
-            <param name="ref_file" value="chrI-ce11.fa"/>
-            <param name="single_bam" value="celegans_RG_1.bam"/>
-            <param name="choices" value="no"/>
-            <param name="prpos" value="no"/>
-            <output name="vcf_call" ftype="vcf" file="result-5.vcf" lines_diff="12"/>
+        <test expect_num_outputs="1">
+            <conditional name="set_plan">
+                <param name="plan_choice" value="single"/>
+                <param name="single_bam" value="celegans_RG_1.bam" ftype="bam"/>
+            </conditional>
+            <param name="reference_source|reference_source_selector" value="history" />
+            <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/>
+            <conditional name="set_exclusion">
+                <param name="choices" value="no"/>
+            </conditional>
+            <param name="removepr" value="false"/>
+            <output name="vcf_call" file="result_single.vcf" ftype="vcf" lines_diff="12"/>
         </test>
     </tests>
 
-    <help>
-**smoove** simplifies and speeds up SV calling and genotyping for short reads. It also improves
-specificity by removing many spurious alignment signals that are indicative of low-level
-noise and often contribute to spurious calls.
-
-There is a blog-post describing smoove in more detail
-here: https://brentp.github.io/post/smoove/
+    <help><![CDATA[
+**What it does**
 
-Currently, this Galaxy tool only wraps smoove for 1, 2 (bam normal and tumor inputs) or
-a small collection of samples (&lt;40),
-which translates in the command line::
-
-    <![CDATA[smoove call --name my-cohort --exclude $bed --fasta $fasta -p $threads --genotype [--removepr] /path/to/*.bam]]>
-
+**smoove** simplifies and speeds up Structural Variant (SV) calling and genotyping for short reads. It improves specificity by removing many spurious alignment signals that are indicative of low-level noise and often contribute to spurious calls.
 
-the --exclude $bed is highly recommended as it can be used to ignore reads that overlap
-problematic regions.
+This Galaxy tool wraps `smoove` for single samples, pairs (e.g., normal/tumor), or small cohorts (<40 samples).
 
-A good set of regions for GRCh37 can be found here_
-
-.. _here: https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed
-
+**Why exclude regions?**
 
-And a good set for GRCh38 can be found there_
+The accuracy of SV detection can be greatly improved by excluding problematic regions of the genome. These regions often have highly repetitive sequences (like centromeres and telomeres) or are naturally highly variable (like immunoglobulin gene regions). Reads aligning to these areas can create ambiguous signals, leading to false-positive SV calls. Using a "blacklist" BED file is therefore highly recommended.
 
-.. _there: https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed
-
-
-smoove will::
+**Recommended Exclusion File**
 
-    1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy
-    
-    2. further filter lumpy_filter calls to remove high-coverage, spurious regions and user-specified chroms like 'hs37d5';
-    it will also remove reads that we've found are likely spurious signals. after this, it will
-    remove singleton reads (where the mate was removed by one of the previous filters)
-    from the discordant bams. This makes lumpy much faster and less memory-hungry.
-    
-    3. calculate per-sample metrics for mean, standard deviation, and distribution of insert
-    size as required by lumpy.
-    
-    4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region
-    genotyping while lumpy is still running.
-    
-    5. sort, compress, and index final VCF (but this galaxy wrapper is uncompression the gzip_vcf output)
+Although this file was generated a few years ago, it is tied to the genome assembly version.
 
-**Input(s)**
+- **GRCh38 / hg38**: `https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed`
+
+**Inputs**
 
-* BAM files: Either a pair of Bam files (e.g. normal vs tumor sample) or a single Bam file.
-  Only BAM alignments produced by BWA-mem have been tested with this tool
-    
-    .. class:: warningmark
-    
-    It is mandatory for proper run of svtyper that **BAM files contain read group information**,
-    ie the @RG tag is present and filled in each BAM file
-
+- **BAM files**: The tool accepts single, paired, or a collection of BAM files.
+- **Reference Genome**: You must provide the reference genome that was used for the alignment.
+- **Exclusion BED (Optional, but recommended)**: A BED file specifying regions to ignore during the analysis.
 
-* A bed file describing the regions to exclude from the analysis
-* Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84)
-
-.. _repository: https://github.com/brentp/smoove
-
+.. class:: warningmark
 
-Options::
+It is mandatory for a proper run that **BAM files contain read group information** (i.e., the @RG tag must be present in the BAM header).
 
-    <![CDATA[
-
-    smoove calls several programs. Those with 'Y' are found on your $PATH. Only those with '*' are required.
+**How it works**
 
-  [Y] bgzip [ sort   -> (compress) ->   index ]
-  [Y] gsort [(sort)  ->  compress   ->  index ]
-  [Y] tabix [ sort   ->  compress   -> (index)]
-  [Y] lumpy
-  [Y] lumpy_filter
-  [Y] samtools
-  [Y] svtyper
-  [Y] mosdepth [extra filtering of split and discordant files for better scaling]
+`smoove` orchestrates a workflow that involves several tools. It intelligently parallelizes `lumpy` to detect breakpoints and `svtyper` to genotype the identified variants, resulting in a final VCF file.
 
-  [Y] duphold [(optional) annotate calls with depth changes]
-  [Y] svtools [only needed for large cohorts].
-
-    Available sub-commands are below. Each can be run with -h for additional help.
+**smoove GitHub**
 
- call        : call lumpy (and optionally svtyper)
- merge       : merge and sort (using svtools) calls from multiple samples
- genotype    : parallelize svtyper on an input VCF
- paste       : square final calls from multiple samples (each with same number of variants)
- plot-counts : plot counts of split, discordant reads before, after smoove filtering
- annotate    : annotate a VCF with gene and quality of SV call
- hipstr      : run hipSTR in parallel
- cnvnator    : run cnvnator in parallel
- duphold     : run duphold in parallel (this can be done by adding a flag to call or genotype)
-    ]]>
-    </help>
-
+For more detailed information, please refer to the `smoove` GitHub repository: https://github.com/brentp/smoove
+    ]]></help>
     <citations>
-    <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
-  </citations>
+        <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
+    </citations>
 </tool>
--- a/macro_lumpy_smoove.xml	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<macros>
-    <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
-
-    <token name="@set_fasta_index@"><![CDATA[
-    #if str( $reference_source.reference_source_selector ) == "history":
-        ln -s '${reference_source.ref_file}' reference.fa &&
-        samtools faidx 'reference.fa' 2>&1 || echo "Error running samtools faidx for lumpy_smoove" >&2 &&
-    #else:
-        ln -s '${reference_source.index.fields.path}' reference.fa &&
-        ln -s '${reference_source.index.fields.path}.fai' reference.fa.fai &&
-    #end if
-    ]]></token>
-
-    <macro name="reference_source_conditional">
-        <conditional name="reference_source">
-            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
-                <option value="cached">Use a built-in genome index</option>
-                <option value="history">Use a genome from history and build index</option>
-            </param>
-            <when value="cached">
-                <param name="index" type="select" label="Using built-in genome" help="Select genome from the list">
-                    <options from_data_table="fasta_indexes">
-                        <filter type="sort_by" column="2" />
-                        <validator type="no_options" message="No indexes are available" />
-                    </options>
-                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
-                </param>
-            </when>
-            <when value="history">
-                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence"
-                    help="You can upload a FASTA sequence to the history and use it as reference" />
-            </when>
-        </conditional>
-    </macro>
-</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Oct 17 17:21:17 2025 +0000
@@ -0,0 +1,57 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.2.8</token>
+    <token name="@GALAXY_VERSION@">4</token>
+
+    <!-- Requirements for lumpy_smoove (Python 2.7 environment) -->
+    <xml name="lumpy_smoove_requirements">
+        <requirements>
+            <requirement type="package" version="0.2.8">smoove</requirement>
+            <requirement type="package" version="0.7.1">svtyper</requirement>
+            <requirement type="package" version="2.7">python</requirement>
+        </requirements>
+    </xml>
+
+    <!-- Requirements for vcf2hrdetect (Python 3.9 environment) -->
+    <xml name="vcf2hrdetect_requirements">
+        <requirements>
+            <requirement type="package" version="3.9">python</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Tool exception" />
+        </stdio>
+    </xml>
+    <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
+    <token name="@set_fasta_index@"><![CDATA[
+    #if str( $reference_source.reference_source_selector ) == "history":
+        ln -s '${reference_source.ref_file}' reference.fa &&
+        samtools faidx 'reference.fa' 2>&1 || echo "Error running samtools faidx for lumpy_smoove" >&2 &&
+    #else:
+        ln -s '${reference_source.index.fields.path}' reference.fa &&
+        ln -s '${reference_source.index.fields.path}.fai' reference.fa.fai &&
+    #end if
+    ]]></token>
+    <macro name="reference_source_conditional">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options.">
+                <option value="cached">Use a built-in genome index</option>
+                <option value="history">Use a genome from history and build index</option>
+            </param>
+            <when value="cached">
+                <param name="index" type="select" label="Using built-in genome" help="Select genome from the list">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No indexes are available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" />
+            </when>
+        </conditional>
+    </macro>
+</macros>
+
+
--- a/test-data/result-1.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG1:0,638,0,454
-##smoove_count_stats=RG2:0,466,0,362
-##source=LUMPY
-##bcftools_annotateVersion=1.17+htslib-1.17
-##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:03 2024
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:03 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
-chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-2.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG1:0,638,0,598
-##smoove_count_stats=RG2:0,466,0,448
-##source=LUMPY
-##bcftools_annotateVersion=1.17+htslib-1.17
-##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:21 2024
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:21 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
-chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-3.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG2:0,466,0,448
-##smoove_count_stats=RG1:0,638,0,598
-##source=LUMPY
-##bcftools_annotateVersion=1.17+htslib-1.17
-##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:39 2024
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:39 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG2	RG1
-chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83
--- a/test-data/result-4.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
-##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG1:0,638,0,598
-##smoove_count_stats=RG2:0,466,0,448
-##source=LUMPY
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:57 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
-chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;PRPOS=0.258039,0.191959,0.142119,0.105597,0.0777833,0.0578014,0.0427846,0.032024,0.023764,0.0176738,0.0130831,0.00982276,0.00731486,0.00542727,0.00399758,0.00297292,0.00221878,0.00165017,0.00122989,0.000915475,0.000674295,0.000501964,0.000370575;PREND=5.85377e-06,7.73803e-06,1.03845e-05,1.38061e-05,1.83274e-05,2.4271e-05,3.24374e-05,4.32758e-05,5.77075e-05,7.68205e-05,0.000102643,0.000137372,0.000184925,0.00024509,0.000329301,0.000440606,0.000583882,0.000773449,0.00103874,0.00137293,0.0018211,0.00244098,0.0032246,0.00433051,0.00577701,0.00771097,0.0100947,0.0134638,0.017428,0.023191,0.0297785,0.0287776,0.0271433,0.0260442,0.0251005,0.0239221,0.0226973,0.022123,0.0207745,0.0198427,0.0190339,0.0181385,0.0179065,0.0174155,0.0168898,0.0165094,0.0157197,0.015398,0.0149506,0.0147207,0.014548,0.0142966,0.0140289,0.0136684,0.0132831,0.013124,0.0126578,0.0124307,0.0121283,0.0114912,0.0113024,0.0110801,0.0107855,0.0107408,0.0105719,0.010252,0.0100385,0.00989579,0.00964551,0.00952156,0.00917511,0.00901622,0.0089231,0.00872881,0.00852142,0.00828831,0.00796058,0.00770706,0.00751548,0.00708522,0.00687959,0.00654007,0.0063474,0.00626172,0.00607514,0.00598551,0.00572162,0.00547532,0.00517031,0.00507122,0.00476895,0.00459012,0.00440836,0.00415379,0.00393712,0.00382996,0.00364121,0.00353575,0.00342231,0.00315652,0.00304452,0.00290783,0.00272193,0.00266152,0.00248906,0.00233477,0.00230558,0.00218745,0.00210921,0.00208486,0.00195023,0.00191344,0.00188952,0.00177463,0.00176537,0.00165498,0.00159982,0.00154295,0.00152936,0.00151581,0.00148667,0.00136872,0.00133499,0.00122388,0.00117244,0.00114374,0.00109766,0.00105654,0.00102599,0.000908013,0.00089917,0.000841279,0.000780186,0.000748484,0.000704024,0.000674257,0.000641784,0.000584102,0.000563501,0.000540792,0.000519593,0.000507932,0.000498128,0.000471617,0.000459362,0.000446368,0.000438032,0.000435269,0.000425232,0.000401305,0.00038503,0.000357298,0.000347914,0.000330873,0.000326613,0.000317727,0.00029779,0.000277006,0.000261571,0.000246701,0.00023847,0.000227691,0.000216491,0.000207228,0.000194862,0.000188863,0.000183536,0.000181414,0.000168119,0.000161592,0.000152474,0.000150676,0.00014958,0.000147849,0.00014255,0.000141498,0.000136699,0.000135359,0.000130407,0.000124551,0.000118969,0.000112593,0.000109014,0.000105885,0.000101976,0.000101471,9.81661e-05,9.4903e-05,9.33805e-05,8.92603e-05,8.5718e-05,8.43405e-05,8.43405e-05,7.83631e-05,7.77206e-05,7.24593e-05,7.20819e-05,6.92001e-05,6.88047e-05,6.45869e-05,5.93805e-05,5.39222e-05,5.04172e-05,4.97203e-05,4.78039e-05,4.40517e-05,4.07077e-05,3.76779e-05,3.26187e-05,3.09886e-05,2.94952e-05,2.81091e-05,2.67635e-05,2.55503e-05,2.44152e-05,2.44152e-05,2.31509e-05,2.20697e-05,2.16647e-05,1.9562e-05,1.89572e-05,1.73377e-05,1.68284e-05,1.56743e-05,1.54104e-05,1.44891e-05,1.44455e-05,1.24429e-05,1.21889e-05,1.15144e-05,1.1345e-05,1.06711e-05,1.05578e-05,1.04779e-05,9.85417e-06,8.85111e-06,7.89968e-06,7.4231e-06,6.88815e-06,6.86571e-06,6.76755e-06,6.74529e-06,6.38168e-06,5.57945e-06,5.28579e-06,4.95863e-06,4.82984e-06,4.81336e-06,4.77422e-06,4.66379e-06,4.59303e-06,4.28648e-06,4.25661e-06,4.19686e-06,3.40704e-06,3.37014e-06,2.74502e-06,2.73489e-06,2.53014e-06,2.51133e-06,2.27525e-06,2.24712e-06,1.83448e-06,1.65944e-06,1.65308e-06,1.37572e-06,1.25623e-06,1.13226e-06,1.1278e-06,1.11313e-06,9.94694e-07,8.85533e-07,7.94148e-07,7.25022e-07,6.40991e-07,6.40991e-07,5.66608e-07,5.66608e-07,5.04999e-07,4.97883e-07,4.858e-07,4.858e-07,4.3091e-07,4.10457e-07,4.04316e-07,3.96503e-07,3.90456e-07,3.41062e-07,3.34301e-07,2.87897e-07,2.86605e-07,2.78756e-07,2.71737e-07,2.66751e-07,2.5482e-07,2.2371e-07,2.19872e-07,2.12274e-07,2.12274e-07,2.11258e-07,2.08515e-07,2.07512e-07,2.03502e-07,2.025e-07,2.01497e-07,1.7167e-07,1.70816e-07,1.44698e-07,1.43971e-07,1.02317e-07,1.02317e-07,7.27124e-08,7.0605e-08,5.82493e-08,5.76361e-08,5.76361e-08,5.59409e-08,3.89337e-08,3.87244e-08,3.79226e-08,3.77165e-08,3.59485e-08,2.87588e-08,2.79782e-08,2.16365e-08,2.16365e-08,2.11459e-08,2.05441e-08,1.99506e-08,1.93652e-08,1.54922e-08,1.54032e-08,1.52251e-08,1.48557e-08,1.45936e-08,1.45062e-08,1.44188e-08,1.43314e-08,1.43314e-08,1.43314e-08,6.94477e-09,6.67767e-09,3.21146e-09,3.14592e-09,3.12614e-09,3.04164e-09,3.04164e-09,3.04164e-09,1.46002e-09,1.40973e-09,1.40046e-09,1.40046e-09,1.36027e-09,1.32049e-09,1.27247e-09,6.36233e-10,6.31905e-10,3.0946e-10,2.9998e-10,2.97867e-10,2.90602e-10,2.90602e-10;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-5.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG1:0,638,0,598
-##source=LUMPY
-##bcftools_annotateVersion=1.17+htslib-1.17
-##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:28:14 2024
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:28:14 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1
-chrI_sub	50000	1	N	<DUP>	71.54	.	SVTYPE=DUP;SVLEN=981;END=50981;STRANDS=-+:4;IMPRECISE;CIPOS=-646,29;CIEND=-30,505;CIPOS95=-164,8;CIEND95=-9,133;SU=4;PE=4;SR=0;AC=2;AN=2	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:8:71.54:-8,-2,-1:9:3:5:3:5:0:0:0:3:5:0.62
--- a/test-data/result-6.vcf	Wed Jan 24 19:26:57 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-##fileformat=VCFv4.2
-##FILTER=<ID=PASS,Description="All filters passed">
-##fileDate=20240124
-##reference=reference.fa
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
-##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
-##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
-##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
-##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
-##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
-##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
-##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
-##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
-##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
-##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
-##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
-##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
-##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
-##ALT=<ID=DEL,Description="Deletion">
-##ALT=<ID=DUP,Description="Duplication">
-##ALT=<ID=INV,Description="Inversion">
-##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
-##ALT=<ID=INS,Description="Insertion of novel sequence">
-##ALT=<ID=CNV,Description="Copy number variable region">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
-##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
-##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
-##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
-##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
-##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
-##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
-##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
-##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
-##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
-##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
-##contig=<ID=chrI_sub,length=100000>
-##smoove_version=0.2.8
-##smoove_count_stats=RG1:0,638,0,454
-##smoove_count_stats=RG2:0,466,0,362
-##source=LUMPY
-##bcftools_annotateVersion=1.17+htslib-1.17
-##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:26:45 2024
-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
-##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
-##bcftools_viewVersion=1.17+htslib-1.17
-##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:26:45 2024
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
-chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_cohort.vcf	Fri Oct 17 17:21:17 2025 +0000
@@ -0,0 +1,59 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20251017
+##reference=reference.fa
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
+##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
+##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
+##contig=<ID=chrI_sub,length=100000>
+##smoove_version=0.2.8
+##smoove_count_stats=RG1:0,638,0,598
+##smoove_count_stats=RG2:0,466,0,448
+##source=LUMPY
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##bcftools_viewVersion=1.17+htslib-1.17
+##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:45:07 2025
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
+chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;PRPOS=0.258039,0.191959,0.142119,0.105597,0.0777833,0.0578014,0.0427846,0.032024,0.023764,0.0176738,0.0130831,0.00982276,0.00731486,0.00542727,0.00399758,0.00297292,0.00221878,0.00165017,0.00122989,0.000915475,0.000674295,0.000501964,0.000370575;PREND=5.85377e-06,7.73803e-06,1.03845e-05,1.38061e-05,1.83274e-05,2.4271e-05,3.24374e-05,4.32758e-05,5.77075e-05,7.68205e-05,0.000102643,0.000137372,0.000184925,0.00024509,0.000329301,0.000440606,0.000583882,0.000773449,0.00103874,0.00137293,0.0018211,0.00244098,0.0032246,0.00433051,0.00577701,0.00771097,0.0100947,0.0134638,0.017428,0.023191,0.0297785,0.0287776,0.0271433,0.0260442,0.0251005,0.0239221,0.0226973,0.022123,0.0207745,0.0198427,0.0190339,0.0181385,0.0179065,0.0174155,0.0168898,0.0165094,0.0157197,0.015398,0.0149506,0.0147207,0.014548,0.0142966,0.0140289,0.0136684,0.0132831,0.013124,0.0126578,0.0124307,0.0121283,0.0114912,0.0113024,0.0110801,0.0107855,0.0107408,0.0105719,0.010252,0.0100385,0.00989579,0.00964551,0.00952156,0.00917511,0.00901622,0.0089231,0.00872881,0.00852142,0.00828831,0.00796058,0.00770706,0.00751548,0.00708522,0.00687959,0.00654007,0.0063474,0.00626172,0.00607514,0.00598551,0.00572162,0.00547532,0.00517031,0.00507122,0.00476895,0.00459012,0.00440836,0.00415379,0.00393712,0.00382996,0.00364121,0.00353575,0.00342231,0.00315652,0.00304452,0.00290783,0.00272193,0.00266152,0.00248906,0.00233477,0.00230558,0.00218745,0.00210921,0.00208486,0.00195023,0.00191344,0.00188952,0.00177463,0.00176537,0.00165498,0.00159982,0.00154295,0.00152936,0.00151581,0.00148667,0.00136872,0.00133499,0.00122388,0.00117244,0.00114374,0.00109766,0.00105654,0.00102599,0.000908013,0.00089917,0.000841279,0.000780186,0.000748484,0.000704024,0.000674257,0.000641784,0.000584102,0.000563501,0.000540792,0.000519593,0.000507932,0.000498128,0.000471617,0.000459362,0.000446368,0.000438032,0.000435269,0.000425232,0.000401305,0.00038503,0.000357298,0.000347914,0.000330873,0.000326613,0.000317727,0.00029779,0.000277006,0.000261571,0.000246701,0.00023847,0.000227691,0.000216491,0.000207228,0.000194862,0.000188863,0.000183536,0.000181414,0.000168119,0.000161592,0.000152474,0.000150676,0.00014958,0.000147849,0.00014255,0.000141498,0.000136699,0.000135359,0.000130407,0.000124551,0.000118969,0.000112593,0.000109014,0.000105885,0.000101976,0.000101471,9.81661e-05,9.4903e-05,9.33805e-05,8.92603e-05,8.5718e-05,8.43405e-05,8.43405e-05,7.83631e-05,7.77206e-05,7.24593e-05,7.20819e-05,6.92001e-05,6.88047e-05,6.45869e-05,5.93805e-05,5.39222e-05,5.04172e-05,4.97203e-05,4.78039e-05,4.40517e-05,4.07077e-05,3.76779e-05,3.26187e-05,3.09886e-05,2.94952e-05,2.81091e-05,2.67635e-05,2.55503e-05,2.44152e-05,2.44152e-05,2.31509e-05,2.20697e-05,2.16647e-05,1.9562e-05,1.89572e-05,1.73377e-05,1.68284e-05,1.56743e-05,1.54104e-05,1.44891e-05,1.44455e-05,1.24429e-05,1.21889e-05,1.15144e-05,1.1345e-05,1.06711e-05,1.05578e-05,1.04779e-05,9.85417e-06,8.85111e-06,7.89968e-06,7.4231e-06,6.88815e-06,6.86571e-06,6.76755e-06,6.74529e-06,6.38168e-06,5.57945e-06,5.28579e-06,4.95863e-06,4.82984e-06,4.81336e-06,4.77422e-06,4.66379e-06,4.59303e-06,4.28648e-06,4.25661e-06,4.19686e-06,3.40704e-06,3.37014e-06,2.74502e-06,2.73489e-06,2.53014e-06,2.51133e-06,2.27525e-06,2.24712e-06,1.83448e-06,1.65944e-06,1.65308e-06,1.37572e-06,1.25623e-06,1.13226e-06,1.1278e-06,1.11313e-06,9.94694e-07,8.85533e-07,7.94148e-07,7.25022e-07,6.40991e-07,6.40991e-07,5.66608e-07,5.66608e-07,5.04999e-07,4.97883e-07,4.858e-07,4.858e-07,4.3091e-07,4.10457e-07,4.04316e-07,3.96503e-07,3.90456e-07,3.41062e-07,3.34301e-07,2.87897e-07,2.86605e-07,2.78756e-07,2.71737e-07,2.66751e-07,2.5482e-07,2.2371e-07,2.19872e-07,2.12274e-07,2.12274e-07,2.11258e-07,2.08515e-07,2.07512e-07,2.03502e-07,2.025e-07,2.01497e-07,1.7167e-07,1.70816e-07,1.44698e-07,1.43971e-07,1.02317e-07,1.02317e-07,7.27124e-08,7.0605e-08,5.82493e-08,5.76361e-08,5.76361e-08,5.59409e-08,3.89337e-08,3.87244e-08,3.79226e-08,3.77165e-08,3.59485e-08,2.87588e-08,2.79782e-08,2.16365e-08,2.16365e-08,2.11459e-08,2.05441e-08,1.99506e-08,1.93652e-08,1.54922e-08,1.54032e-08,1.52251e-08,1.48557e-08,1.45936e-08,1.45062e-08,1.44188e-08,1.43314e-08,1.43314e-08,1.43314e-08,6.94477e-09,6.67767e-09,3.21146e-09,3.14592e-09,3.12614e-09,3.04164e-09,3.04164e-09,3.04164e-09,1.46002e-09,1.40973e-09,1.40046e-09,1.40046e-09,1.36027e-09,1.32049e-09,1.27247e-09,6.36233e-10,6.31905e-10,3.0946e-10,2.9998e-10,2.97867e-10,2.90602e-10,2.90602e-10;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_paired.vcf	Fri Oct 17 17:21:17 2025 +0000
@@ -0,0 +1,59 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20251017
+##reference=reference.fa
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
+##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
+##contig=<ID=chrI_sub,length=100000>
+##smoove_version=0.2.8
+##smoove_count_stats=RG1:0,638,0,454
+##smoove_count_stats=RG2:0,466,0,362
+##source=LUMPY
+##bcftools_annotateVersion=1.17+htslib-1.17
+##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Fri Oct 17 17:44:56 2025
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##bcftools_viewVersion=1.17+htslib-1.17
+##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:44:56 2025
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1	RG2
+chrI_sub	50007	1	N	<DUP>	103.66	.	SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83	1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result_single.vcf	Fri Oct 17 17:21:17 2025 +0000
@@ -0,0 +1,58 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20251017
+##reference=reference.fa
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
+##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
+##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
+##contig=<ID=chrI_sub,length=100000>
+##smoove_version=0.2.8
+##smoove_count_stats=RG1:0,638,0,598
+##source=LUMPY
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##bcftools_viewVersion=1.17+htslib-1.17
+##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:45:18 2025
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	RG1
+chrI_sub	50000	1	N	<DUP>	71.54	.	SVTYPE=DUP;SVLEN=981;END=50981;STRANDS=-+:4;IMPRECISE;CIPOS=-646,29;CIEND=-30,505;CIPOS95=-164,8;CIEND95=-9,133;SU=4;PE=4;SR=0;PRPOS=6.63313e-13,6.63313e-13,9.28639e-13,3.71455e-12,3.71455e-12,3.71455e-12,4.2452e-12,4.2452e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,1.32663e-11,1.32663e-11,2.65325e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,5.85042e-11,5.85042e-11,5.85042e-11,6.50047e-11,8.49041e-11,8.49041e-11,8.49041e-11,1.16743e-10,1.16743e-10,1.16743e-10,1.16743e-10,1.16743e-10,1.27356e-10,1.27356e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.48582e-10,2.08015e-10,2.08015e-10,2.63269e-10,3.19684e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.86844e-10,4.77586e-10,4.77586e-10,4.77586e-10,4.77586e-10,5.77878e-10,5.77878e-10,6.09983e-10,7.22348e-10,7.58466e-10,9.02637e-10,9.02637e-10,1.05934e-09,1.05934e-09,1.05934e-09,1.05934e-09,1.10979e-09,1.28709e-09,1.28709e-09,1.28709e-09,1.3456e-09,2.20452e-09,2.20452e-09,2.20452e-09,2.39622e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.87546e-09,2.87546e-09,3.49234e-09,3.49234e-09,3.63204e-09,3.77173e-09,3.77173e-09,4.2123e-09,4.36274e-09,5.02858e-09,5.57183e-09,7.45929e-09,7.45929e-09,7.95657e-09,8.20522e-09,8.20522e-09,8.45386e-09,8.45386e-09,9.27816e-09,9.27816e-09,9.82393e-09,9.82393e-09,1.13689e-08,1.13689e-08,1.13689e-08,1.13689e-08,1.34321e-08,1.34321e-08,1.34321e-08,1.41783e-08,1.49245e-08,1.61061e-08,1.65414e-08,1.65414e-08,1.69767e-08,1.7412e-08,1.88328e-08,2.03093e-08,2.12764e-08,2.28817e-08,2.45452e-08,2.62672e-08,2.74612e-08,2.74612e-08,2.74612e-08,3.20258e-08,3.70203e-08,3.79233e-08,4.02565e-08,4.02565e-08,4.02565e-08,4.1215e-08,4.7281e-08,4.83555e-08,4.83555e-08,4.83555e-08,5.14073e-08,5.4545e-08,5.4545e-08,5.65652e-08,5.98319e-08,6.18951e-08,6.31846e-08,6.81057e-08,7.47103e-08,8.12235e-08,8.82283e-08,9.28108e-08,9.56232e-08,9.56232e-08,9.56232e-08,9.56232e-08,1.03158e-07,1.03158e-07,1.03158e-07,1.03158e-07,1.05069e-07,1.12395e-07,1.14402e-07,1.22157e-07,1.22157e-07,1.26369e-07,1.28475e-07,1.38407e-07,1.56156e-07,1.63334e-07,1.70252e-07,1.74617e-07,1.85271e-07,1.99275e-07,1.99275e-07,1.99275e-07,2.17015e-07,2.20071e-07,2.29337e-07,2.38793e-07,2.58278e-07,2.75015e-07,2.82655e-07,2.89549e-07,2.97374e-07,2.97374e-07,2.97374e-07,3.04455e-07,3.23402e-07,3.35104e-07,3.35104e-07,3.35104e-07,3.64585e-07,3.91068e-07,4.10224e-07,4.40476e-07,4.49847e-07,4.87137e-07,5.26094e-07,5.36831e-07,6.28698e-07,6.47988e-07,6.68396e-07,6.75822e-07,7.04434e-07,7.55902e-07,7.80286e-07,7.88414e-07,8.75731e-07,9.19142e-07,9.28426e-07,9.46995e-07,9.64532e-07,9.91697e-07,1.06626e-06,1.0955e-06,1.15552e-06,1.20686e-06,1.26203e-06,1.27371e-06,1.34862e-06,1.38434e-06,1.41875e-06,1.43153e-06,1.54371e-06,1.63101e-06,1.68683e-06,1.83001e-06,1.92736e-06,2.03583e-06,2.13276e-06,2.13276e-06,2.23194e-06,2.28238e-06,2.34578e-06,2.3982e-06,2.51744e-06,2.57247e-06,2.76434e-06,2.84595e-06,2.89338e-06,3.07861e-06,3.14112e-06,3.32116e-06,3.40742e-06,3.51855e-06,3.61953e-06,3.82682e-06,3.82682e-06,3.97829e-06,4.3079e-06,4.42232e-06,4.64309e-06,4.73138e-06,4.8178e-06,4.95949e-06,5.14289e-06,5.23818e-06,5.54465e-06,5.78622e-06,5.93058e-06,6.35481e-06,6.46518e-06,6.59449e-06,6.68869e-06,6.8494e-06,6.99209e-06,7.25209e-06,7.25209e-06,7.46728e-06,7.91281e-06,8.22498e-06,8.57942e-06,8.83103e-06,8.91132e-06,9.34443e-06,9.42786e-06,9.54497e-06,1.0001e-05,1.01503e-05,1.03626e-05,1.0708e-05,1.07721e-05,1.10062e-05,1.12436e-05,1.14173e-05,1.14844e-05,1.14844e-05,1.17286e-05,1.20455e-05,1.23309e-05,1.28821e-05,1.3366e-05,1.38719e-05,1.40722e-05,1.43823e-05,1.49633e-05,1.53878e-05,1.64742e-05,1.7479e-05,1.76711e-05,1.8827e-05,1.94225e-05,2.01862e-05,2.03427e-05,2.11326e-05,2.11326e-05,2.13976e-05,2.1561e-05,2.2619e-05,2.33086e-05,2.39458e-05,2.48369e-05,2.53281e-05,2.59478e-05,2.65228e-05,2.7221e-05,2.77425e-05,2.87972e-05,2.98736e-05,3.05204e-05,3.20095e-05,3.25818e-05,3.33799e-05,3.39248e-05,3.48474e-05,3.53405e-05,3.59666e-05,3.6784e-05,3.75874e-05,3.92024e-05,4.0241e-05,4.20038e-05,4.43531e-05,4.52941e-05,4.66503e-05,4.73292e-05,4.80349e-05,4.87269e-05,4.87269e-05,4.90119e-05,5.11599e-05,5.24451e-05,5.34086e-05,5.47146e-05,5.55009e-05,5.69811e-05,5.84684e-05,5.93665e-05,6.02015e-05,6.14814e-05,6.40561e-05,6.6295e-05,6.85631e-05,6.98671e-05,7.051e-05,7.17372e-05,7.47684e-05,7.68994e-05,7.71907e-05,7.77733e-05,8.10807e-05,8.48277e-05,8.70145e-05,8.93699e-05,9.4566e-05,9.72675e-05,0.00010155,0.000104033,0.000105817,0.000108912,0.000110757,0.000112617,0.000115108,0.000117426,0.000120796,0.000124217,0.000127035,0.000133958,0.000135284,0.00013771,0.000141465,0.000143421,0.000149375,0.000152392,0.000158742,0.000161697,0.000165596,0.000170667,0.000175111,0.000177145,0.000180153,0.000183445,0.000187346,0.000191523,0.000197398,0.000203528,0.000208594,0.000218837,0.000223945,0.000229382,0.000233277,0.000237934,0.000245812,0.000251642,0.000263897,0.000269275,0.000272132,0.000280268,0.000283994,0.000288584,0.000297088,0.000306919,0.000318725,0.00032372,0.000330515,0.000334746,0.000339348,0.000348665,0.000356201,0.000364227,0.000372055,0.000377991,0.000390787,0.000398994,0.000414013,0.000420426,0.000427645,0.00044589,0.000453498,0.000463816,0.000479121,0.00048951,0.000501275,0.000510841,0.00051379,0.00052265,0.00053549,0.000541968,0.000544993,0.000560474,0.000573072,0.000579449,0.000599811,0.000611191,0.000632963,0.000652784,0.000674123,0.000695846,0.00070158,0.000703662,0.000726154,0.000739201,0.000763046,0.000779934,0.000797922,0.000816787,0.000835967,0.000851666,0.00086839,0.00089212,0.00090997,0.000930854,0.000951251,0.000962474,0.000977879,0.00100665,0.00102443,0.00103429,0.00106096,0.00106526,0.00108546,0.00110102,0.00112637,0.00113921,0.00118109,0.00121094,0.00124954,0.00128687,0.00130263,0.00134446,0.00135993,0.00137618,0.00140981,0.00144243,0.00149414,0.00154698,0.00156333,0.00158274,0.00160802,0.00164882,0.00167303,0.0017254,0.001761,0.0017684,0.00182601,0.00186751,0.00188394,0.0019363,0.00196713,0.00200835,0.00203755,0.00206133,0.00211151,0.00213421,0.00220223,0.00224795,0.00226556,0.00231049,0.00238568,0.00244674,0.00246971,0.00251416,0.00253874,0.00255714,0.00261604,0.0026918,0.00271102,0.00276336,0.00279437,0.00285041,0.00289274,0.00294048,0.0029948,0.00306635,0.0031464,0.00319082,0.0032411,0.00331793,0.00337316,0.00342132,0.00347983,0.00354716,0.00356508,0.00361396,0.00370163,0.00373695,0.00374977,0.0037958,0.00384122,0.00389989,0.00401315,0.00411437,0.00419071,0.00425638,0.00431481,0.00435195,0.0044396,0.00452008,0.00457854,0.00462369,0.00470661,0.00480849,0.00487013,0.00494259,0.0050318,0.00506453,0.00514111,0.00521677,0.00528543,0.00536058,0.00546628,0.00556218,0.00569322,0.00578077,0.00584297,0.00592578,0.00605669,0.00608542,0.00612454,0.00617244,0.00623092,0.00634898,0.00647577,0.00663329,0.0067718,0.00685291,0.00695353,0.00702677,0.00713294,0.00729075,0.00737829,0.00747472,0.00761973,0.00771011,0.00779947,0.00787921,0.00800693,0.00801747,0.00820048,0.00824719,0.00828323,0.00842485,0.008535,0.00871719,0.00888922,0.0090021,0.00911408,0.00915106,0.00926424,0.00938023,0.00943336,0.00949674,0.00960008,0.00968815,0.00980632,0.00992631,0.0101043,0.0102097,0.0104223,0.0105457,0.0105742,0.0107447,0.0108014,0.0110009,0.0111191,0.0112494,0.0113544,0.0115402,0.0116588,0.0117334,0.0118262,0.0119305,0.012018,0.0121724,0.012295,0.0125323,0.0127161,0.012828,0.0130123,0.0131287,0.0133421,0.0135318,0.00997979,0.00742702,0.00552616,0.00412186,0.00306302,0.00227049,0.00167883,0.0012489,0.000924642,0.000687025,0.000506065,0.000376061,0.00027836,0.000208351,0.000154611,0.000114988,8.51197e-05,6.39078e-05,4.75912e-05,3.53103e-05,2.60086e-05,1.93421e-05,1.44356e-05,1.07361e-05,8.00181e-06,5.95616e-06,4.38703e-06,3.26583e-06,2.41099e-06;PREND=3.03189e-06,4.08075e-06,5.42715e-06,7.2752e-06,9.78941e-06,1.31464e-05,1.76564e-05,2.36589e-05,3.17685e-05,4.27446e-05,5.71532e-05,7.68831e-05,0.000103212,0.000136802,0.000184779,0.000246446,0.000330777,0.000443821,0.000589039,0.000780171,0.00103289,0.0013799,0.001855,0.00248779,0.00335196,0.00440533,0.00584237,0.0077623,0.0102619,0.0138515,0.0184346,0.0180755,0.0178232,0.0174946,0.0171445,0.0169641,0.0165645,0.0162885,0.0160958,0.0158644,0.0156359,0.0154264,0.0151264,0.01483,0.014592,0.014239,0.0139718,0.0137623,0.0134515,0.0131013,0.0128354,0.012673,0.0124121,0.0123026,0.0120969,0.0119128,0.0116482,0.0114645,0.0110884,0.0108412,0.0105261,0.0103996,0.0102712,0.0100908,0.00985027,0.00964829,0.0094525,0.00930827,0.00914226,0.00896594,0.00882723,0.00860671,0.00847046,0.00835048,0.00814898,0.0079474,0.00783296,0.00762806,0.00749484,0.00739587,0.00720925,0.00711508,0.00696391,0.00675945,0.00666688,0.00653664,0.00642077,0.00632301,0.00620215,0.00613777,0.00602424,0.00592835,0.00579545,0.00566521,0.00559181,0.00549276,0.00546682,0.00538658,0.00521263,0.00507089,0.00487481,0.00475571,0.00469295,0.00458368,0.00452003,0.00439439,0.0042222,0.00411819,0.00401878,0.00394121,0.0038585,0.00376369,0.00367385,0.00358379,0.00350216,0.00344247,0.00336339,0.00330029,0.00322828,0.00316982,0.00307046,0.00303561,0.00293575,0.0028841,0.00285284,0.00277647,0.00273949,0.00266295,0.00255831,0.00251499,0.00246623,0.00243464,0.00241937,0.00235306,0.00232614,0.00225741,0.00218091,0.0021302,0.00205022,0.00201654,0.00194522,0.00187635,0.00182548,0.0017735,0.00172878,0.00169442,0.00162852,0.00157855,0.00153443,0.00149397,0.00147055,0.00144682,0.00141845,0.00136882,0.00131586,0.00130082,0.00127695,0.00126218,0.00123721,0.00121279,0.00118854,0.00116046,0.00113674,0.00111913,0.00108491,0.00107559,0.00106259,0.00105148,0.00103479,0.00101689,0.000972695,0.000948257,0.000924493,0.000900991,0.000889668,0.000865146,0.000854104,0.00083534,0.000815753,0.000794719,0.000781337,0.000766838,0.000752243,0.000736664,0.000724082,0.000712894,0.000705973,0.00068831,0.000680327,0.000669642,0.000652806,0.000636146,0.00062849,0.000611093,0.000596294,0.000587972,0.000571393,0.0005645,0.000553982,0.000543961,0.000523867,0.000513997,0.000489447,0.00047912,0.00045258,0.000438637,0.000414931,0.000399876,0.000387653,0.000370544,0.000352613,0.000345222,0.00032418,0.000312893,0.000301045,0.000289058,0.00028623,0.000278805,0.000270802,0.000265514,0.00025398,0.000249552,0.000242674,0.000239684,0.000237239,0.000233503,0.000229487,0.000223591,0.000218308,0.00021671,0.000209341,0.000205909,0.000201536,0.000191555,0.000189009,0.000186479,0.000182105,0.000181642,0.000178786,0.000174499,0.0001717,0.000169812,0.00016606,0.000164195,0.000158742,0.000155993,0.000154637,0.00015152,0.000148165,0.000144351,0.000139104,0.000134674,0.000131545,0.000125267,0.000122249,0.000116809,0.000113561,0.000112219,0.000109435,0.000107821,0.000103422,9.89701e-05,9.3618e-05,9.21414e-05,8.77116e-05,8.47192e-05,8.16515e-05,7.70723e-05,7.37032e-05,7.19537e-05,6.85302e-05,6.67847e-05,6.47584e-05,5.97291e-05,5.80279e-05,5.57261e-05,5.23545e-05,5.13809e-05,4.82288e-05,4.52392e-05,4.46737e-05,4.25417e-05,4.10962e-05,4.06973e-05,3.83544e-05,3.76308e-05,3.71604e-05,3.50322e-05,3.4915e-05,3.28554e-05,3.18809e-05,3.08061e-05,3.05929e-05,3.03797e-05,2.98526e-05,2.76428e-05,2.70656e-05,2.4861e-05,2.3955e-05,2.35059e-05,2.26474e-05,2.17991e-05,2.13365e-05,1.90338e-05,1.89619e-05,1.77768e-05,1.65525e-05,1.5912e-05,1.50583e-05,1.45103e-05,1.38969e-05,1.27266e-05,1.23033e-05,1.18815e-05,1.14397e-05,1.1183e-05,1.09902e-05,1.04272e-05,1.01992e-05,9.93164e-06,9.80852e-06,9.76748e-06,9.58319e-06,9.04396e-06,8.69585e-06,8.12193e-06,7.92576e-06,7.53756e-06,7.44051e-06,7.25383e-06,6.81345e-06,6.35175e-06,6.03739e-06,5.70671e-06,5.52847e-06,5.29026e-06,5.05241e-06,4.86868e-06,4.58842e-06,4.46719e-06,4.36083e-06,4.33986e-06,4.02183e-06,3.89227e-06,3.67266e-06,3.65448e-06,3.6363e-06,3.6363e-06,3.51422e-06,3.49647e-06,3.39388e-06,3.37656e-06,3.29215e-06,3.17483e-06,3.04735e-06,2.89107e-06,2.81294e-06,2.73891e-06,2.65086e-06,2.65086e-06,2.57091e-06,2.49167e-06,2.4517e-06,2.36123e-06,2.28479e-06,2.25954e-06,2.25954e-06,2.11017e-06,2.09825e-06,1.98168e-06,1.98168e-06,1.91246e-06,1.90154e-06,1.78968e-06,1.65854e-06,1.5101e-06,1.41572e-06,1.40744e-06,1.36053e-06,1.26748e-06,1.17448e-06,1.09307e-06,9.51557e-07,9.04001e-07,8.65244e-07,8.26892e-07,7.89521e-07,7.62297e-07,7.30506e-07,7.30506e-07,6.94657e-07,6.6794e-07,6.59485e-07,5.98947e-07,5.87279e-07,5.40297e-07,5.25985e-07,4.92848e-07,4.86003e-07,4.58325e-07,4.58325e-07,3.99618e-07,3.93868e-07,3.7322e-07,3.67732e-07,3.49119e-07,3.46494e-07,3.43869e-07,3.27494e-07,2.96977e-07,2.65904e-07,2.50666e-07,2.35632e-07,2.35632e-07,2.33791e-07,2.33791e-07,2.22658e-07,1.96628e-07,1.86279e-07,1.7593e-07,1.73115e-07,1.73115e-07,1.71708e-07,1.68893e-07,1.67486e-07,1.56852e-07,1.56852e-07,1.56852e-07,1.29172e-07,1.29172e-07,1.06377e-07,1.06377e-07,9.87785e-08,9.87785e-08,9.01683e-08,8.93909e-08,7.29759e-08,6.67715e-08,6.67715e-08,5.5999e-08,5.15348e-08,4.68146e-08,4.68146e-08,4.6389e-08,4.17849e-08,3.74993e-08,3.37656e-08,3.09518e-08,2.75888e-08,2.75888e-08,2.45888e-08,2.45888e-08,2.19152e-08,2.1696e-08,2.12577e-08,2.12577e-08,1.89347e-08,1.8343e-08,1.81458e-08,1.79485e-08,1.77513e-08,1.57789e-08,1.56036e-08,1.35582e-08,1.35582e-08,1.32465e-08,1.30907e-08,1.30907e-08,1.26231e-08,1.1082e-08,1.09435e-08,1.06665e-08,1.06665e-08,1.06665e-08,1.05279e-08,1.05279e-08,1.05279e-08,1.05279e-08,1.05279e-08,8.96953e-09,8.96953e-09,7.63622e-09,7.63622e-09,5.45444e-09,5.45444e-09,3.89603e-09,3.84192e-09,3.21964e-09,3.21964e-09,3.21964e-09,3.12494e-09,2.19828e-09,2.19828e-09,2.16446e-09,2.16446e-09,2.063e-09,1.6504e-09,1.62335e-09,1.25539e-09,1.25539e-09,1.23374e-09,1.2121e-09,1.19045e-09,1.16881e-09,9.35048e-10,9.35048e-10,9.35048e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,4.50208e-10,4.32892e-10,2.12117e-10,2.07788e-10,2.07788e-10,2.03459e-10,2.03459e-10,2.03459e-10,9.95653e-11,9.74008e-11,9.74008e-11,9.74008e-11,9.52363e-11,9.30719e-11,9.09074e-11,4.54537e-11,4.54537e-11,2.27269e-11,2.21857e-11,2.21857e-11,2.16446e-11,2.16446e-11;AC=2;AN=2	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:8:71.54:-8,-2,-1:9:3:5:3:5:0:0:0:3:5:0.62
--- a/test-data/test_vcf2hrdetect.tab	Wed Jan 24 19:26:57 2024 +0000
+++ b/test-data/test_vcf2hrdetect.tab	Fri Oct 17 17:21:17 2025 +0000
@@ -1,180 +1,263 @@
 chr1	pos1	chr2	pos2	type
-10	132635667	10	132636051	INV
-18	77309929	18	77312095	DEL
-10	132988685	10	132991206	DEL
-10	132858454	10	132858819	DEL
-5	26796704	5	26801897	DEL
-5	147553039	5	147554616	INV
-20	3821170	20	3825120	DEL
-6	26625483	7	68758443	TRA
-6	161336425	7	85956546	TRA
+1	14436660	1	14438563	DUP
+1	28515659	19	24027918	TRA
 1	42552553	4	44842163	TRA
-4	73831211	4	73831818	DEL
-8	59616000	16	3354097	TRA
-8	30145409	17	7167951	TRA
-3	158034493	6	32688592	TRA
-8	140475500	12	58359059	TRA
-11	99185461	11	99186696	DEL
-16	33485944	16	33486280	DEL
+1	43694066	1	43695538	DEL
+1	44059282	1	44059890	INV
+1	44059890	1	44059282	INV
+1	64897140	6	159204434	TRA
+1	66804423	13	59377693	TRA
+1	80221789	1	80223032	DEL
+1	80794627	1	80795137	INV
+1	80795137	1	80794627	INV
+1	91151631	8	85437297	TRA
+1	92232065	1	92233362	DEL
+1	105428633	7	93357413	TRA
+1	119401195	15	51189306	TRA
+1	168024578	19	24033135	TRA
+1	221827800	13	61315891	TRA
+1	234805693	16	59323740	TRA
+2	12019022	7	52399206	TRA
+2	18118870	2	18120031	DEL
+2	19002880	2	19003387	DEL
+2	34797601	2	34801579	DEL
+2	34965732	2	34967261	DEL
+2	48990532	2	48991191	DEL
+2	76773562	2	76775444	DEL
+2	77358259	18	59713754	TRA
+2	78091959	4	174044216	TRA
+2	120417073	2	120418454	DUP
+2	125766561	2	125768399	DUP
+2	153997618	17	74377266	TRA
+2	156774338	2	156776256	DEL
+2	160095376	2	160095824	INV
+2	160095824	2	160095376	INV
+2	178351450	19	52567370	TRA
+2	178351470	19	52567638	TRA
+2	179315148	2	179315689	DEL
+2	189119618	2	189120979	DEL
+2	204273805	4	68944782	TRA
 3	12818457	5	144122990	TRA
+3	32203783	3	32204153	DEL
+3	38399708	8	75876952	TRA
+3	48372309	7	32225987	TRA
+3	54428366	12	28226427	TRA
+3	75960769	11	130675918	TRA
 3	105828859	5	141379626	TRA
-8	96265720	19	18835613	TRA
-6	32315438	6	32316258	DEL
-10	55893654	10	55894125	DEL
-7	97993595	7	97993987	DEL
-5	53122773	5	53125484	INV
-6	65022877	6	65023371	DUP
-19	15046341	19	15049471	DEL
-1	105428633	7	93357413	TRA
-1	91151631	8	85437297	TRA
-13	38071947	13	38085574	DEL
-1	64897140	6	159204434	TRA
-2	156774338	2	156776256	DEL
-14	23105069	14	23107950	DEL
-21	32956225	X	47183861	TRA
-8	16821879	8	16822259	DEL
-8	140475502	11	56303340	TRA
 3	124936235	3	124937163	DEL
-7	98015910	X	136092975	TRA
-7	127930431	16	76865795	TRA
-11	106143090	11	106143794	INV
-6	37294925	6	37295284	DEL
-2	179315148	2	179315689	DEL
+3	129763389	3	129806742	DEL
+3	144517821	3	144518457	DEL
+3	151748833	3	176444683	INV
+3	158034493	6	32688592	TRA
+3	176444683	3	151748833	INV
+3	182298978	11	94948981	TRA
+3	182299013	9	1940734	TRA
+3	184156702	20	231409	TRA
+3	190785116	3	190789319	DEL
+3	192376521	10	65545580	TRA
+4	44842163	1	42552553	TRA
+4	68944782	2	204273805	TRA
+4	73831211	4	73831818	DEL
+4	81136688	6	68923562	TRA
+4	88268261	4	88274334	DEL
+4	99513602	6	32589570	TRA
+4	102787795	4	102788358	DEL
+4	144573271	4	144573612	DEL
+4	151011309	4	151011648	DUP
 4	162642267	X	3599701	TRA
 4	162642407	X	3599706	TRA
-4	144573271	4	144573612	DEL
-7	86615393	7	86615759	DEL
-2	48990532	2	48991191	DEL
-X	114216686	X	114544524	INV
-2	160095376	2	160095824	INV
-17	46615627	17	46617222	DUP
-17	45634774	17	45635284	DEL
-20	62949719	20	62959202	DEL
-2	18118870	2	18120031	DEL
-10	59257660	10	59257985	INV
-9	127885505	18	71711520	TRA
-10	51019958	12	46175132	TRA
-10	59256942	10	59257951	INV
-10	68673027	16	59976476	TRA
-7	100056291	7	129157154	DUP
+4	174044216	2	78091959	TRA
+4	175534267	10	123120353	TRA
+4	183256660	8	131547788	TRA
+4	186382139	9	129005120	TRA
+5	1178285	5	1180725	DEL
+5	5595231	15	52589534	TRA
+5	26796704	5	26801897	DEL
 5	37709852	7	8663298	TRA
-3	129763389	3	129806742	DEL
+5	53122606	5	53125511	INV
+5	53122773	5	53125484	INV
+5	53125484	5	53122773	INV
+5	53125511	5	53122606	INV
 5	54865791	10	75489275	TRA
 5	54865801	10	75489000	TRA
-3	144517821	3	144518457	DEL
+5	95966521	5	95967361	DEL
 5	103062313	5	103062635	DEL
+5	108595090	5	108601127	DEL
+5	127336004	5	127336923	DEL
+5	141379626	3	105828859	TRA
+5	144122990	3	12818457	TRA
+5	147553039	5	147554616	INV
+5	147554616	5	147553039	INV
+5	157223490	5	176652060	INV
+5	157223548	11	3760878	TRA
+5	169597700	5	169598748	INV
+5	169598748	5	169597700	INV
+5	176651964	11	3760899	TRA
+5	176652060	5	157223490	INV
+5	178109055	5	178113407	DEL
+6	17608998	X	117432517	TRA
+6	20169115	6	108322150	DUP
+6	26625483	7	68758443	TRA
+6	32315438	6	32316258	DEL
+6	32589570	4	99513602	TRA
+6	32688592	3	158034493	TRA
+6	37294925	6	37295284	DEL
+6	37295025	12	22085955	TRA
+6	65022877	6	65023371	DUP
+6	68923562	4	81136688	TRA
+6	72863847	6	72873539	DEL
+6	74156485	14	71480241	TRA
 6	74932726	6	74933069	DEL
-2	189119618	2	189120979	DEL
-16	13294470	16	13296490	DEL
-6	72863847	6	72873539	DEL
-4	81136688	6	68923562	TRA
-20	61724700	20	61725614	DEL
-4	99513602	6	32589570	TRA
+6	89921735	6	89922171	DEL
+6	107307808	8	105231718	TRA
 6	136589486	6	136590574	DEL
-17	70815221	17	70821355	DEL
-17	68159938	17	68160278	DEL
-10	35593223	10	35593548	DEL
-4	88268261	4	88274334	DEL
-10	45158118	10	45158533	DEL
-14	90299534	14	90302487	INV
-9	141058509	10	107209	TRA
-3	151748833	3	176444683	INV
-1	92232065	1	92233362	DEL
-22	19099927	22	19100621	DEL
-22	17770355	22	17779150	DEL
-3	32203783	3	32204153	DEL
-13	35787786	X	11952984	TRA
-14	106484224	15	22486809	TRA
-2	19002880	2	19003387	DEL
-X	55702382	X	55709885	DEL
-4	102787795	4	102788358	DEL
-12	30478280	12	30480988	DEL
-15	75867566	16	89281701	TRA
-4	186382139	9	129005120	TRA
-4	183256660	8	131547788	TRA
-2	12019022	7	52399206	TRA
+6	159204434	1	64897140	TRA
+6	161336425	7	85956546	TRA
+7	8663298	5	37709852	TRA
+7	32225987	3	48372309	TRA
+7	52399206	2	12019022	TRA
+7	65857652	7	65860106	DEL
+7	68758443	6	26625483	TRA
+7	70420972	7	70438890	INV
+7	70438890	7	70420972	INV
 7	76134084	7	76140082	DEL
-4	175534267	10	123120353	TRA
-15	62706095	15	62707781	DEL
-14	47118175	14	63226299	INV
-12	12026417	12	12027146	DEL
-14	60341404	14	60343237	DEL
-14	61058056	14	61059493	DEL
-6	17608998	X	117432517	TRA
-5	108595090	5	108601127	DEL
-11	93695221	11	93702091	DEL
-2	34965732	2	34967261	DEL
-2	34797601	2	34801579	DEL
-6	74156485	14	71480241	TRA
+7	85956546	6	161336425	TRA
+7	86615393	7	86615759	DEL
+7	93357413	1	105428633	TRA
+7	93541820	7	93542546	DEL
+7	97993595	7	97993987	DEL
+7	98015910	X	136092975	TRA
+7	100056291	7	129157154	DUP
+7	111053752	12	108203254	TRA
+7	127930431	16	76865795	TRA
 7	151230203	7	151231867	DEL
 7	156387047	7	156387226	INV
-1	14436660	1	14438563	DUP
-5	127336004	5	127336923	DEL
-5	157223490	5	176652060	INV
-5	1178285	5	1180725	DEL
-3	190785116	3	190789319	DEL
-11	97172221	11	97172970	DEL
-14	80106294	14	80115045	DEL
-14	65842539	14	65843134	INV
-16	69854786	16	69859026	DUP
-15	39744398	15	39744849	DEL
-7	65857652	7	65860106	DEL
-1	43694066	1	43695538	DEL
-1	44059282	1	44059890	INV
-9	104714849	9	104724476	DEL
+7	156387226	7	156387047	INV
+8	11245570	8	11247212	DEL
+8	14588058	8	14588822	DEL
+8	16821879	8	16822259	DEL
+8	30145409	17	7167951	TRA
+8	59616000	16	3354097	TRA
+8	75876952	3	38399708	TRA
+8	85437297	1	91151631	TRA
+8	96265720	19	18835613	TRA
+8	105231718	6	107307808	TRA
+8	118980771	8	118981151	DEL
+8	131547788	4	183256660	TRA
+8	140475500	12	58359059	TRA
+8	140475502	11	56303340	TRA
+9	1940734	3	182299013	TRA
 9	104182136	9	104182429	DUP
-7	70420972	7	70438890	INV
-6	89921735	6	89922171	DEL
-3	182299013	9	1940734	TRA
-5	157223548	11	3760878	TRA
-3	192376521	10	65545580	TRA
-3	48372309	7	32225987	TRA
-3	38399708	8	75876952	TRA
-5	95966521	5	95967361	DEL
-6	107307808	8	105231718	TRA
-3	182298978	11	94948981	TRA
-3	75960769	11	130675918	TRA
-6	37295025	12	22085955	TRA
-4	151011309	4	151011648	DUP
-6	20169115	6	108322150	DUP
+9	104714849	9	104724476	DEL
+9	110033442	9	110035532	DEL
+9	127885505	18	71711520	TRA
+9	129005120	4	186382139	TRA
 9	139427511	9	139427797	DEL
+9	141058509	10	107209	TRA
+10	107209	9	141058509	TRA
+10	35593223	10	35593548	DEL
+10	45158118	10	45158533	DEL
+10	51019958	12	46175132	TRA
+10	55893654	10	55894125	DEL
+10	59256942	10	59257951	INV
+10	59257660	10	59257985	INV
+10	59257951	10	59256942	INV
+10	59257985	10	59257660	INV
+10	65545580	3	192376521	TRA
+10	68673027	16	59976476	TRA
+10	75489000	5	54865801	TRA
+10	75489275	5	54865791	TRA
+10	87242583	10	87243046	DUP
+10	123120353	4	175534267	TRA
+10	132635667	10	132636051	INV
+10	132636051	10	132635667	INV
+10	132858454	10	132858819	DEL
+10	132988685	10	132991206	DEL
+11	3760878	5	157223548	TRA
+11	3760899	5	176651964	TRA
+11	56303340	8	140475502	TRA
+11	93695221	11	93702091	DEL
+11	94948981	3	182298978	TRA
+11	97172221	11	97172970	DEL
+11	99185461	11	99186696	DEL
+11	106143090	11	106143794	INV
+11	106143794	11	106143090	INV
+11	130675918	3	75960769	TRA
+12	12026417	12	12027146	DEL
+12	22085955	6	37295025	TRA
+12	28226427	3	54428366	TRA
+12	30478280	12	30480988	DEL
+12	46175132	10	51019958	TRA
+12	58359059	8	140475500	TRA
+12	104359630	12	125801144	INV
+12	108203254	7	111053752	TRA
+12	125801144	12	104359630	INV
+13	35787786	X	11952984	TRA
+13	38071947	13	38085574	DEL
+13	59377693	1	66804423	TRA
+13	61315891	1	221827800	TRA
 13	74915300	13	74916746	DEL
-1	168024578	19	24033135	TRA
-2	125766561	2	125768399	DUP
-2	120417073	2	120418454	DUP
+13	89421953	13	89422932	DEL
+14	23105069	14	23107950	DEL
 14	32953296	14	32954345	DEL
-7	93541820	7	93542546	DEL
-8	11245570	8	11247212	DEL
-7	111053752	12	108203254	TRA
-1	234805693	16	59323740	TRA
-1	80794627	1	80795137	INV
-9	110033442	9	110035532	DEL
-1	80221789	1	80223032	DEL
-1	66804423	13	59377693	TRA
-1	221827800	13	61315891	TRA
-1	119401195	15	51189306	TRA
+14	41829108	14	41980350	DEL
+14	42486317	14	42486634	DEL
+14	47118175	14	63226299	INV
+14	60341404	14	60343237	DEL
+14	61058056	14	61059493	DEL
+14	63226299	14	47118175	INV
+14	65842539	14	65843134	INV
+14	65843134	14	65842539	INV
+14	71480241	6	74156485	TRA
+14	80106294	14	80115045	DEL
+14	90299534	14	90302487	INV
+14	90302487	14	90299534	INV
+14	106484224	15	22486809	TRA
+15	22486809	14	106484224	TRA
+15	39744398	15	39744849	DEL
+15	51189306	1	119401195	TRA
+15	52589534	5	5595231	TRA
+15	62706095	15	62707781	DEL
+15	75867566	16	89281701	TRA
+16	3354097	8	59616000	TRA
+16	13294470	16	13296490	DEL
+16	33485944	16	33486280	DEL
+16	59323740	1	234805693	TRA
+16	59976476	10	68673027	TRA
+16	69854786	16	69859026	DUP
+16	76865795	7	127930431	TRA
 16	85302496	16	85304389	DEL
-8	118980771	8	118981151	DEL
-3	54428366	12	28226427	TRA
+16	89281701	15	75867566	TRA
+17	7167951	8	30145409	TRA
+17	45634774	17	45635284	DEL
+17	46615627	17	46617222	DUP
+17	68159938	17	68160278	DEL
+17	70815221	17	70821355	DEL
+17	74377266	2	153997618	TRA
 18	39585688	18	39586313	DEL
-3	184156702	20	231409	TRA
-13	89421953	13	89422932	DEL
-5	169597700	5	169598748	INV
-2	76773562	2	76775444	DEL
-5	178109055	5	178113407	DEL
-10	87242583	10	87243046	DUP
-8	14588058	8	14588822	DEL
+18	59713754	2	77358259	TRA
+18	71711520	9	127885505	TRA
+18	77309929	18	77312095	DEL
+19	15046341	19	15049471	DEL
+19	18835613	8	96265720	TRA
+19	24027918	1	28515659	TRA
+19	24033135	1	168024578	TRA
 19	51077891	19	51082521	DEL
-2	78091959	4	174044216	TRA
-2	204273805	4	68944782	TRA
-5	176651964	11	3760899	TRA
-5	5595231	15	52589534	TRA
-1	28515659	19	24027918	TRA
-2	153997618	17	74377266	TRA
-12	104359630	12	125801144	INV
-2	178351470	19	52567638	TRA
-2	178351450	19	52567370	TRA
-2	77358259	18	59713754	TRA
-14	42486317	14	42486634	DEL
-14	41829108	14	41980350	DEL
-5	53122606	5	53125511	INV
+19	52567370	2	178351450	TRA
+19	52567638	2	178351470	TRA
+20	231409	3	184156702	TRA
+20	3821170	20	3825120	DEL
+20	61724700	20	61725614	DEL
+20	62949719	20	62959202	DEL
+21	32956225	X	47183861	TRA
+22	17770355	22	17779150	DEL
+22	19099927	22	19100621	DEL
+X	3599701	4	162642267	TRA
+X	3599706	4	162642407	TRA
+X	11952984	13	35787786	TRA
+X	47183861	21	32956225	TRA
+X	55702382	X	55709885	DEL
+X	114216686	X	114544524	INV
+X	114544524	X	114216686	INV
+X	117432517	6	17608998	TRA
+X	136092975	7	98015910	TRA
--- a/vcf2hrdetect.py	Wed Jan 24 19:26:57 2024 +0000
+++ b/vcf2hrdetect.py	Fri Oct 17 17:21:17 2025 +0000
@@ -1,41 +1,119 @@
+#!/usr/bin/env python
+import argparse
+import re
 import sys
 
-handle = open(sys.argv[1], 'r')
-vcfdict = dict()
-tabdict = dict()
-for line in handle:
-    if line[0] == "#":
-        continue
-    else:
-        tabline = line[:-1].split("\t")
-        vcfdict[tabline[2]] = tabline
-for id in vcfdict.keys():
-    if "_1" in id:
-        newid = id[:-2]
-        pointbreak = vcfdict[id][4]
-        if "]" in pointbreak:
-            coordbreak = pointbreak.split("]")[1].split(":")[1]
-            chrom = pointbreak.split("]")[1].split(":")[0]
-        elif "[" in pointbreak:
-            coordbreak = pointbreak.split("[")[1].split(":")[1]
-            chrom = pointbreak.split("[")[1].split(":")[0]
-        if vcfdict[id][0] == chrom:
-            tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"]
-        else:
-            tabdict[newid] = [vcfdict[id][0], vcfdict[id][1],
-                              chrom, coordbreak, "TRA"]
-for id in list(vcfdict):
-    if "_" in id:
-        del vcfdict[id]
-for id in vcfdict.keys():  # only sv that are not of type TRA or INV
-    chr1 = vcfdict[id][0]
-    chr2 = vcfdict[id][0]
-    pos1 = vcfdict[id][1]
-    pos2 = vcfdict[id][7].split("END=")[1].split(";")[0]
-    type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0]
-    tabdict[id] = [chr1, pos1, chr2, pos2, type]
-out = open(sys.argv[2], 'w')
-out.write("chr1\tpos1\tchr2\tpos2\ttype\n")
-for key in tabdict:
-    line = "\t".join(tabdict[key]) + "\n"
-    out.write(line)
+
+def create_arg_parser():
+    """Creates and returns the argument parser."""
+    parser = argparse.ArgumentParser(
+        description=(
+            "Convert a VCF file from lumpy-smoove to a tabular format "
+            "compatible with the HRDetect pipeline."
+        )
+    )
+    parser.add_argument(
+        'vcf_file',
+        help='Path to the input VCF file.'
+    )
+    parser.add_argument(
+        'output_file',
+        help='Path to the output tabular file.'
+    )
+    return parser
+
+
+def parse_breakend_alt(alt_field):
+    """
+    Parses the ALT field for a breakend and returns chromosome and position.
+
+    Args:
+        alt_field (str): The ALT field (column 5) of a VCF line.
+
+    Returns:
+        tuple: A tuple containing (chromosome, position) or (None, None)
+               if parsing fails.
+    """
+    # Search for patterns ]chr:pos] or [chr:pos[
+    pattern = (
+        r"\](?P<chrom1>[^:]+):(?P<pos1>\d+)\]|"
+        r"\[(?P<chrom2>[^:]+):(?P<pos2>\d+)\["
+    )
+    match = re.search(pattern, alt_field)
+
+    if not match:
+        return None, None
+
+    groups = match.groupdict()
+    chrom = groups['chrom1'] or groups['chrom2']
+    pos = groups['pos1'] or groups['pos2']
+    return chrom, pos
+
+
+def process_vcf(vcf_path, output_path):
+    """
+    Reads a VCF file, converts it, and writes the result to a tabular file.
+
+    Args:
+        vcf_path (str): Path to the input VCF file.
+        output_path (str): Path to the output tabular file.
+    """
+    header = ["chr1", "pos1", "chr2", "pos2", "type"]
+    try:
+        with open(vcf_path, 'r') as infile, open(output_path, 'w') as outfile:
+            outfile.write("\t".join(header) + "\n")
+
+            for line in infile:
+                if line.startswith('#'):
+                    continue
+
+                fields = line.strip().split('\t')
+                if len(fields) < 8:
+                    continue
+
+                chrom1 = fields[0]
+                pos1 = fields[1]
+                info = fields[7]
+
+                # Attempt to extract the structural variant type from the info
+                svtype_match = re.search(r'SVTYPE=([^;]+)', info)
+                if not svtype_match:
+                    continue  # Skip lines without SVTYPE tag
+                svtype = svtype_match.group(1)
+
+                if svtype == "BND":  # Breakend (INV or TRA)
+                    alt_field = fields[4]
+                    chrom2, pos2 = parse_breakend_alt(alt_field)
+                    if not (chrom2 and pos2):
+                        continue
+                    event_type = "INV" if chrom1 == chrom2 else "TRA"
+                    row = [chrom1, pos1, chrom2, pos2, event_type]
+                    outfile.write("\t".join(row) + "\n")
+
+                else:  # Other SV types (DEL, DUP, etc.)
+                    end_match = re.search(r'END=([^;]+)', info)
+                    if not end_match:
+                        continue
+                    pos2 = end_match.group(1)
+                    chrom2 = chrom1
+                    row = [chrom1, pos1, chrom2, pos2, svtype]
+                    outfile.write("\t".join(row) + "\n")
+
+    except FileNotFoundError:
+        print(f"Error: File '{vcf_path}' not found.",
+              file=sys.stderr)
+        sys.exit(1)
+    except IOError as e:
+        print(f"IO Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    """Main function of the script."""
+    parser = create_arg_parser()
+    args = parser.parse_args()
+    process_vcf(args.vcf_file, args.output_file)
+
+
+if __name__ == '__main__':
+    main()
--- a/vcf2hrdetect.xml	Wed Jan 24 19:26:57 2024 +0000
+++ b/vcf2hrdetect.xml	Fri Oct 17 17:21:17 2025 +0000
@@ -1,38 +1,44 @@
-<tool id="vcf2hrdetect" name="Convert lumpy-smoove vcf" version="3">
-    <description>to tabular hrdetect</description>
-    <requirements>
-        <requirement type="package" version="0.2.8">smoove</requirement>
-    </requirements>
+<tool id="vcf2hrdetect" name="Convert VCF for HRDetect" version="4">
+    <description>from lumpy-smoove output to a 5-column tabular format</description>
     <macros>
-        <import>macro_lumpy_smoove.xml</import>
+        <!-- Use the centralized macros file -->
+        <import>macros.xml</import>
     </macros>
-    <stdio>
-        <exit_code range="1:" level="fatal" description="Tool exception" />
-    </stdio>
+    
+    <!-- Expand macros for requirements and stdio -->
+    <expand macro="vcf2hrdetect_requirements" />
+    <expand macro="stdio" />
+
     <command detect_errors="exit_code"><![CDATA[
     @pipefail@
     python '$__tool_directory__'/vcf2hrdetect.py '$lumpy_smoove_vcf' '$hrdetect_tabular'
     ]]></command>
+    
     <inputs>
-        <param name="lumpy_smoove_vcf" type="data" format="vcf" label="lumpy-smoove vcf to process"/>
-   </inputs>
+        <param name="lumpy_smoove_vcf" type="data" format="vcf" label="Input lumpy-smoove VCF" help="VCF file containing structural variants detected by lumpy-smoove."/>
+    </inputs>
 
     <outputs>
-        <data format="tabular" name="hrdetect_tabular" label="tabular hrdetec SVs" />
+        <data format="tabular" name="hrdetect_tabular" label="HRDetect formatted structural variants" />
     </outputs>
 
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="lumpy_smoove_vcf" value="test_vcf2hrdetect.vcf" ftype="vcf" />
             <output name="hrdetect_tabular" file="test_vcf2hrdetect.tab" sort="true" ftype="tabular" />
         </test>
     </tests>
+    
     <help>
-**vcf2hrdetect** convert a vcf produced by lumpy-smoove to a somatic_sv.tsv tabular file compatible with
-the HRDetect pipeline (https://github.com/eyzhao/hrdetect-pipeline/blob/master/data/example/patients/patient1/sample1/somatic_sv.tsv)
+**What it does**
+
+This tool converts a VCF file produced by **lumpy-smoove** into the 5-column tabular format (chr1, pos1, chr2, pos2, type) required by the `HRDetect`_ pipeline.
+
+.. _HRDetect: https://github.com/eyzhao/hrdetect-pipeline/
     </help>
 
     <citations>
     <citation type="doi">10.1038/nm.4292</citation>
   </citations>
 </tool>
+