Mercurial > repos > artbio > lumpy_smoove
changeset 11:5a326a6fa105 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
author | artbio |
---|---|
date | Fri, 17 Oct 2025 17:21:17 +0000 |
parents | 8711df965d4b |
children | |
files | lumpy_smoove.xml macro_lumpy_smoove.xml macros.xml test-data/result-1.vcf test-data/result-2.vcf test-data/result-3.vcf test-data/result-4.vcf test-data/result-5.vcf test-data/result-6.vcf test-data/result_cohort.vcf test-data/result_paired.vcf test-data/result_single.vcf test-data/test_vcf2hrdetect.tab vcf2hrdetect.py vcf2hrdetect.xml |
diffstat | 15 files changed, 702 insertions(+), 776 deletions(-) [+] |
line wrap: on
line diff
--- a/lumpy_smoove.xml Wed Jan 24 19:26:57 2024 +0000 +++ b/lumpy_smoove.xml Fri Oct 17 17:21:17 2025 +0000 @@ -1,92 +1,97 @@ -<tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.8+galaxy1"> +<tool id="lumpy_smoove" name="lumpy_smoove" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@"> <description>find structural variants using the smoove workflow</description> <macros> - <import>macro_lumpy_smoove.xml</import> + <import>macros.xml</import> </macros> - <requirements> - <requirement type="package" version="0.7.1">svtyper</requirement> - <requirement type="package" version="0.2.8">smoove</requirement> - </requirements> - <stdio> - <exit_code range="1:" level="fatal" description="Tool exception" /> - </stdio> + <expand macro="lumpy_smoove_requirements" /> + <expand macro="stdio" /> <command detect_errors="exit_code"><![CDATA[ @pipefail@ @set_fasta_index@ + #if $set_plan.plan_choice=='pair': - ln -s $set_plan.normal_bam normal.bam && - ln -f -s $set_plan.normal_bam.metadata.bam_index normal.bam.bai && - ln -s $set_plan.tumor_bam tumor.bam && - ln -f -s $set_plan.tumor_bam.metadata.bam_index tumor.bam.bai && + ln -s '$set_plan.normal_bam' normal.bam && + ln -f -s '$set_plan.normal_bam.metadata.bam_index' normal.bam.bai && + ln -s '$set_plan.tumor_bam' tumor.bam && + ln -f -s '$set_plan.tumor_bam.metadata.bam_index' tumor.bam.bai && #elif $set_plan.plan_choice=='single': - ln -s $set_plan.single_bam single.bam && - ln -f -s $set_plan.single_bam.metadata.bam_index single.bam.bai && + ln -s '$set_plan.single_bam' single.bam && + ln -f -s '$set_plan.single_bam.metadata.bam_index' single.bam.bai && #else: #for $sample in $set_plan.cohort: - ln -s $sample ${sample.element_identifier}.bam && - ln -f -s $sample.metadata.bam_index ${sample.element_identifier}.bam.bai && + ln -s '$sample' '${sample.element_identifier}.bam' && + ln -f -s '$sample.metadata.bam_index' '${sample.element_identifier}.bam.bai' && #end for #end if - + smoove call --name output #if $set_exclusion.choices=="yes": - --exclude $bedmask + --exclude '$bedmask' #end if --fasta reference.fa --processes \${GALAXY_SLOTS:-4} --genotype - #if $prpos=="no": + #if $removepr: --removepr #end if - *.bam && - gunzip -c output-smoove.genotyped.vcf.gz > $vcf_call + *.bam && + gunzip -c output-smoove.genotyped.vcf.gz > '$vcf_call' ]]></command> <inputs> <expand macro="reference_source_conditional" /> <conditional name="set_plan"> - <param name="plan_choice" type="select" label="Analyse a single Bam or a pair of Bam (eg normal/tumor)" display="radio"> - <option value="pair" selected="true">A pair of Bam files</option> - <option value="single">A single Bam</option> - <option value="cohort">a small cohort of Bam files (less than ~40)</option> + <param name="plan_choice" type="select" label="Analysis mode" display="radio"> + <option value="pair" selected="true">A pair of BAM files (e.g., normal/tumor)</option> + <option value="single">A single BAM file</option> + <option value="cohort">A small cohort of BAM files (less than ~40)</option> </param> <when value="pair"> - <param format="bam" name="normal_bam" type="data" label="BAM alignment from the normal sample"/> - <param format="bam" name="tumor_bam" type="data" label="BAM alignment from the tumor sample"/> + <param format="bam" name="normal_bam" type="data" label="Normal/Reference sample BAM" /> + <param format="bam" name="tumor_bam" type="data" label="Tumor/Case sample BAM" /> </when> <when value="single"> - <param format="bam" name="single_bam" type="data" label="BAM alignment from a single sample"/> + <param format="bam" name="single_bam" type="data" label="Single sample BAM file" /> </when> <when value="cohort"> - <param name="cohort" type="data_collection" format="bam" label="A collection of bam files" multiple="true"/> + <param name="cohort" type="data_collection" format="bam" label="A collection of BAM files" /> </when> - </conditional> + </conditional> - <conditional name="set_exclusion"> - <param name="choices" type="select" label="exclude regions with a bed file" display="radio"> + <param name="choices" type="select" label="Exclude genomic regions?" display="radio"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="yes"> - <param format="bed" name="bedmask" type="data" label="BED regions to be excluded for the analysis"/> - </when> - <when value="no"> + <param format="bed" name="bedmask" type="data" label="BED file with regions to exclude" help="This is highly recommended to improve specificity. See help section" /> </when> - </conditional> - <param name="prpos" type="select" label="include the PRPOS probabilities in INFO tags" display="radio"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - </inputs> + <when value="no" /> + </conditional> + <param name="removepr" type="boolean" checked="true" truevalue="--removepr" falsevalue="" label="Do not include PRPOS probabilities in INFO tags" help="Use this advanced option to exclude the PRPOS field, which can reduce VCF file size." /> + </inputs> <outputs> <data format="vcf" name="vcf_call" label="lumpy-smoove Variant Calling" /> </outputs> <tests> - <test> - + <test expect_num_outputs="1"> + <conditional name="set_plan"> + <param name="plan_choice" value="pair"/> + <param name="normal_bam" value="celegans_RG_1.bam" ftype="bam"/> + <param name="tumor_bam" value="celegans_RG_2.bam" ftype="bam"/> + </conditional> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/> + <conditional name="set_exclusion"> + <param name="choices" value="yes"/> + <param name="bedmask" value="exclude.bed" ftype="bed"/> + </conditional> + <param name="removepr" value="true"/> + <output name="vcf_call" file="result_paired.vcf" ftype="vcf" lines_diff="12"/> + </test> + <test expect_num_outputs="1"> <conditional name="set_plan"> <param name="plan_choice" value="cohort"/> <param name="cohort"> @@ -96,156 +101,65 @@ </collection> </param> </conditional> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="choices" value="yes"/> - <param name="bedmask" value="exclude.bed"/> - <param name="prpos" value="no"/> - <output name="vcf_call" ftype="vcf" file="result-6.vcf" lines_diff="12"/> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="normal_bam" value="celegans_RG_1.bam"/> - <param name="tumor_bam" value="celegans_RG_2.bam"/> - <param name="choices" value="yes"/> - <param name="bedmask" value="exclude.bed"/> - <param name="prpos" value="no"/> - <output name="vcf_call" ftype="vcf" file="result-1.vcf" lines_diff="12"/> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="normal_bam" value="celegans_RG_1.bam"/> - <param name="tumor_bam" value="celegans_RG_2.bam"/> - <param name="choices" value="no"/> - <param name="prpos" value="no"/> - <output name="vcf_call" ftype="vcf" file="result-2.vcf" lines_diff="12"/> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/> + <conditional name="set_exclusion"> + <param name="choices" value="no"/> + </conditional> + <param name="removepr" value="false"/> + <output name="vcf_call" file="result_cohort.vcf" ftype="vcf" lines_diff="12"/> </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="normal_bam" value="celegans_RG_2.bam"/> - <param name="tumor_bam" value="celegans_RG_1.bam"/> - <param name="choices" value="no"/> - <param name="prpos" value="no"/> - <output name="vcf_call" ftype="vcf" file="result-3.vcf" lines_diff="12"/> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="normal_bam" value="celegans_RG_1.bam"/> - <param name="tumor_bam" value="celegans_RG_2.bam"/> - <param name="choices" value="no"/> - <param name="prpos" value="yes"/> - <output name="vcf_call" ftype="vcf" file="result-4.vcf" lines_diff="12"/> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="plan_choice" value="single" /> - <param name="ref_file" value="chrI-ce11.fa"/> - <param name="single_bam" value="celegans_RG_1.bam"/> - <param name="choices" value="no"/> - <param name="prpos" value="no"/> - <output name="vcf_call" ftype="vcf" file="result-5.vcf" lines_diff="12"/> + <test expect_num_outputs="1"> + <conditional name="set_plan"> + <param name="plan_choice" value="single"/> + <param name="single_bam" value="celegans_RG_1.bam" ftype="bam"/> + </conditional> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="chrI-ce11.fa" ftype="fasta"/> + <conditional name="set_exclusion"> + <param name="choices" value="no"/> + </conditional> + <param name="removepr" value="false"/> + <output name="vcf_call" file="result_single.vcf" ftype="vcf" lines_diff="12"/> </test> </tests> - <help> -**smoove** simplifies and speeds up SV calling and genotyping for short reads. It also improves -specificity by removing many spurious alignment signals that are indicative of low-level -noise and often contribute to spurious calls. - -There is a blog-post describing smoove in more detail -here: https://brentp.github.io/post/smoove/ + <help><![CDATA[ +**What it does** -Currently, this Galaxy tool only wraps smoove for 1, 2 (bam normal and tumor inputs) or -a small collection of samples (<40), -which translates in the command line:: - - <![CDATA[smoove call --name my-cohort --exclude $bed --fasta $fasta -p $threads --genotype [--removepr] /path/to/*.bam]]> - +**smoove** simplifies and speeds up Structural Variant (SV) calling and genotyping for short reads. It improves specificity by removing many spurious alignment signals that are indicative of low-level noise and often contribute to spurious calls. -the --exclude $bed is highly recommended as it can be used to ignore reads that overlap -problematic regions. +This Galaxy tool wraps `smoove` for single samples, pairs (e.g., normal/tumor), or small cohorts (<40 samples). -A good set of regions for GRCh37 can be found here_ - -.. _here: https://github.com/hall-lab/speedseq/blob/master/annotations/ceph18.b37.lumpy.exclude.2014-01-15.bed - +**Why exclude regions?** -And a good set for GRCh38 can be found there_ +The accuracy of SV detection can be greatly improved by excluding problematic regions of the genome. These regions often have highly repetitive sequences (like centromeres and telomeres) or are naturally highly variable (like immunoglobulin gene regions). Reads aligning to these areas can create ambiguous signals, leading to false-positive SV calls. Using a "blacklist" BED file is therefore highly recommended. -.. _there: https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed - - -smoove will:: +**Recommended Exclusion File** - 1. parallelize calls to lumpy_filter to extract split and discordant reads required by lumpy - - 2. further filter lumpy_filter calls to remove high-coverage, spurious regions and user-specified chroms like 'hs37d5'; - it will also remove reads that we've found are likely spurious signals. after this, it will - remove singleton reads (where the mate was removed by one of the previous filters) - from the discordant bams. This makes lumpy much faster and less memory-hungry. - - 3. calculate per-sample metrics for mean, standard deviation, and distribution of insert - size as required by lumpy. - - 4. stream output of lumpy directly into multiple svtyper processes for parallel-by-region - genotyping while lumpy is still running. - - 5. sort, compress, and index final VCF (but this galaxy wrapper is uncompression the gzip_vcf output) +Although this file was generated a few years ago, it is tied to the genome assembly version. -**Input(s)** +- **GRCh38 / hg38**: `https://github.com/hall-lab/speedseq/blob/master/annotations/exclude.cnvnator_100bp.GRCh38.20170403.bed` + +**Inputs** -* BAM files: Either a pair of Bam files (e.g. normal vs tumor sample) or a single Bam file. - Only BAM alignments produced by BWA-mem have been tested with this tool - - .. class:: warningmark - - It is mandatory for proper run of svtyper that **BAM files contain read group information**, - ie the @RG tag is present and filled in each BAM file - +- **BAM files**: The tool accepts single, paired, or a collection of BAM files. +- **Reference Genome**: You must provide the reference genome that was used for the alignment. +- **Exclusion BED (Optional, but recommended)**: A BED file specifying regions to ignore during the analysis. -* A bed file describing the regions to exclude from the analysis -* Additional options*: refer to smoove GitHub repository_ and the lumpy publication (doi 10.1186/gb-2014-15-6-r84) - -.. _repository: https://github.com/brentp/smoove - +.. class:: warningmark -Options:: +It is mandatory for a proper run that **BAM files contain read group information** (i.e., the @RG tag must be present in the BAM header). - <![CDATA[ - - smoove calls several programs. Those with 'Y' are found on your $PATH. Only those with '*' are required. +**How it works** - [Y] bgzip [ sort -> (compress) -> index ] - [Y] gsort [(sort) -> compress -> index ] - [Y] tabix [ sort -> compress -> (index)] - [Y] lumpy - [Y] lumpy_filter - [Y] samtools - [Y] svtyper - [Y] mosdepth [extra filtering of split and discordant files for better scaling] +`smoove` orchestrates a workflow that involves several tools. It intelligently parallelizes `lumpy` to detect breakpoints and `svtyper` to genotype the identified variants, resulting in a final VCF file. - [Y] duphold [(optional) annotate calls with depth changes] - [Y] svtools [only needed for large cohorts]. - - Available sub-commands are below. Each can be run with -h for additional help. +**smoove GitHub** - call : call lumpy (and optionally svtyper) - merge : merge and sort (using svtools) calls from multiple samples - genotype : parallelize svtyper on an input VCF - paste : square final calls from multiple samples (each with same number of variants) - plot-counts : plot counts of split, discordant reads before, after smoove filtering - annotate : annotate a VCF with gene and quality of SV call - hipstr : run hipSTR in parallel - cnvnator : run cnvnator in parallel - duphold : run duphold in parallel (this can be done by adding a flag to call or genotype) - ]]> - </help> - +For more detailed information, please refer to the `smoove` GitHub repository: https://github.com/brentp/smoove + ]]></help> <citations> - <citation type="doi">10.1186/gb-2014-15-6-r84</citation> - </citations> + <citation type="doi">10.1186/gb-2014-15-6-r84</citation> + </citations> </tool>
--- a/macro_lumpy_smoove.xml Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -<macros> - <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token> - - <token name="@set_fasta_index@"><![CDATA[ - #if str( $reference_source.reference_source_selector ) == "history": - ln -s '${reference_source.ref_file}' reference.fa && - samtools faidx 'reference.fa' 2>&1 || echo "Error running samtools faidx for lumpy_smoove" >&2 && - #else: - ln -s '${reference_source.index.fields.path}' reference.fa && - ln -s '${reference_source.index.fields.path}.fai' reference.fa.fai && - #end if - ]]></token> - - <macro name="reference_source_conditional"> - <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> - <option value="cached">Use a built-in genome index</option> - <option value="history">Use a genome from history and build index</option> - </param> - <when value="cached"> - <param name="index" type="select" label="Using built-in genome" help="Select genome from the list"> - <options from_data_table="fasta_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> - </param> - </when> - <when value="history"> - <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" - help="You can upload a FASTA sequence to the history and use it as reference" /> - </when> - </conditional> - </macro> -</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Oct 17 17:21:17 2025 +0000 @@ -0,0 +1,57 @@ +<macros> + <token name="@TOOL_VERSION@">0.2.8</token> + <token name="@GALAXY_VERSION@">4</token> + + <!-- Requirements for lumpy_smoove (Python 2.7 environment) --> + <xml name="lumpy_smoove_requirements"> + <requirements> + <requirement type="package" version="0.2.8">smoove</requirement> + <requirement type="package" version="0.7.1">svtyper</requirement> + <requirement type="package" version="2.7">python</requirement> + </requirements> + </xml> + + <!-- Requirements for vcf2hrdetect (Python 3.9 environment) --> + <xml name="vcf2hrdetect_requirements"> + <requirements> + <requirement type="package" version="3.9">python</requirement> + </requirements> + </xml> + + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Tool exception" /> + </stdio> + </xml> + <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token> + <token name="@set_fasta_index@"><![CDATA[ + #if str( $reference_source.reference_source_selector ) == "history": + ln -s '${reference_source.ref_file}' reference.fa && + samtools faidx 'reference.fa' 2>&1 || echo "Error running samtools faidx for lumpy_smoove" >&2 && + #else: + ln -s '${reference_source.index.fields.path}' reference.fa && + ln -s '${reference_source.index.fields.path}.fai' reference.fa.fai && + #end if + ]]></token> + <macro name="reference_source_conditional"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options."> + <option value="cached">Use a built-in genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="index" type="select" label="Using built-in genome" help="Select genome from the list"> + <options from_data_table="fasta_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" /> + </when> + </conditional> + </macro> +</macros> + +
--- a/test-data/result-1.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG1:0,638,0,454 -##smoove_count_stats=RG2:0,466,0,362 -##source=LUMPY -##bcftools_annotateVersion=1.17+htslib-1.17 -##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:03 2024 -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:03 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 -chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-2.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG1:0,638,0,598 -##smoove_count_stats=RG2:0,466,0,448 -##source=LUMPY -##bcftools_annotateVersion=1.17+htslib-1.17 -##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:21 2024 -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:21 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 -chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-3.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG2:0,466,0,448 -##smoove_count_stats=RG1:0,638,0,598 -##source=LUMPY -##bcftools_annotateVersion=1.17+htslib-1.17 -##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:27:39 2024 -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:39 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG2 RG1 -chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83
--- a/test-data/result-4.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend"> -##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG1:0,638,0,598 -##smoove_count_stats=RG2:0,466,0,448 -##source=LUMPY -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:27:57 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 -chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;PRPOS=0.258039,0.191959,0.142119,0.105597,0.0777833,0.0578014,0.0427846,0.032024,0.023764,0.0176738,0.0130831,0.00982276,0.00731486,0.00542727,0.00399758,0.00297292,0.00221878,0.00165017,0.00122989,0.000915475,0.000674295,0.000501964,0.000370575;PREND=5.85377e-06,7.73803e-06,1.03845e-05,1.38061e-05,1.83274e-05,2.4271e-05,3.24374e-05,4.32758e-05,5.77075e-05,7.68205e-05,0.000102643,0.000137372,0.000184925,0.00024509,0.000329301,0.000440606,0.000583882,0.000773449,0.00103874,0.00137293,0.0018211,0.00244098,0.0032246,0.00433051,0.00577701,0.00771097,0.0100947,0.0134638,0.017428,0.023191,0.0297785,0.0287776,0.0271433,0.0260442,0.0251005,0.0239221,0.0226973,0.022123,0.0207745,0.0198427,0.0190339,0.0181385,0.0179065,0.0174155,0.0168898,0.0165094,0.0157197,0.015398,0.0149506,0.0147207,0.014548,0.0142966,0.0140289,0.0136684,0.0132831,0.013124,0.0126578,0.0124307,0.0121283,0.0114912,0.0113024,0.0110801,0.0107855,0.0107408,0.0105719,0.010252,0.0100385,0.00989579,0.00964551,0.00952156,0.00917511,0.00901622,0.0089231,0.00872881,0.00852142,0.00828831,0.00796058,0.00770706,0.00751548,0.00708522,0.00687959,0.00654007,0.0063474,0.00626172,0.00607514,0.00598551,0.00572162,0.00547532,0.00517031,0.00507122,0.00476895,0.00459012,0.00440836,0.00415379,0.00393712,0.00382996,0.00364121,0.00353575,0.00342231,0.00315652,0.00304452,0.00290783,0.00272193,0.00266152,0.00248906,0.00233477,0.00230558,0.00218745,0.00210921,0.00208486,0.00195023,0.00191344,0.00188952,0.00177463,0.00176537,0.00165498,0.00159982,0.00154295,0.00152936,0.00151581,0.00148667,0.00136872,0.00133499,0.00122388,0.00117244,0.00114374,0.00109766,0.00105654,0.00102599,0.000908013,0.00089917,0.000841279,0.000780186,0.000748484,0.000704024,0.000674257,0.000641784,0.000584102,0.000563501,0.000540792,0.000519593,0.000507932,0.000498128,0.000471617,0.000459362,0.000446368,0.000438032,0.000435269,0.000425232,0.000401305,0.00038503,0.000357298,0.000347914,0.000330873,0.000326613,0.000317727,0.00029779,0.000277006,0.000261571,0.000246701,0.00023847,0.000227691,0.000216491,0.000207228,0.000194862,0.000188863,0.000183536,0.000181414,0.000168119,0.000161592,0.000152474,0.000150676,0.00014958,0.000147849,0.00014255,0.000141498,0.000136699,0.000135359,0.000130407,0.000124551,0.000118969,0.000112593,0.000109014,0.000105885,0.000101976,0.000101471,9.81661e-05,9.4903e-05,9.33805e-05,8.92603e-05,8.5718e-05,8.43405e-05,8.43405e-05,7.83631e-05,7.77206e-05,7.24593e-05,7.20819e-05,6.92001e-05,6.88047e-05,6.45869e-05,5.93805e-05,5.39222e-05,5.04172e-05,4.97203e-05,4.78039e-05,4.40517e-05,4.07077e-05,3.76779e-05,3.26187e-05,3.09886e-05,2.94952e-05,2.81091e-05,2.67635e-05,2.55503e-05,2.44152e-05,2.44152e-05,2.31509e-05,2.20697e-05,2.16647e-05,1.9562e-05,1.89572e-05,1.73377e-05,1.68284e-05,1.56743e-05,1.54104e-05,1.44891e-05,1.44455e-05,1.24429e-05,1.21889e-05,1.15144e-05,1.1345e-05,1.06711e-05,1.05578e-05,1.04779e-05,9.85417e-06,8.85111e-06,7.89968e-06,7.4231e-06,6.88815e-06,6.86571e-06,6.76755e-06,6.74529e-06,6.38168e-06,5.57945e-06,5.28579e-06,4.95863e-06,4.82984e-06,4.81336e-06,4.77422e-06,4.66379e-06,4.59303e-06,4.28648e-06,4.25661e-06,4.19686e-06,3.40704e-06,3.37014e-06,2.74502e-06,2.73489e-06,2.53014e-06,2.51133e-06,2.27525e-06,2.24712e-06,1.83448e-06,1.65944e-06,1.65308e-06,1.37572e-06,1.25623e-06,1.13226e-06,1.1278e-06,1.11313e-06,9.94694e-07,8.85533e-07,7.94148e-07,7.25022e-07,6.40991e-07,6.40991e-07,5.66608e-07,5.66608e-07,5.04999e-07,4.97883e-07,4.858e-07,4.858e-07,4.3091e-07,4.10457e-07,4.04316e-07,3.96503e-07,3.90456e-07,3.41062e-07,3.34301e-07,2.87897e-07,2.86605e-07,2.78756e-07,2.71737e-07,2.66751e-07,2.5482e-07,2.2371e-07,2.19872e-07,2.12274e-07,2.12274e-07,2.11258e-07,2.08515e-07,2.07512e-07,2.03502e-07,2.025e-07,2.01497e-07,1.7167e-07,1.70816e-07,1.44698e-07,1.43971e-07,1.02317e-07,1.02317e-07,7.27124e-08,7.0605e-08,5.82493e-08,5.76361e-08,5.76361e-08,5.59409e-08,3.89337e-08,3.87244e-08,3.79226e-08,3.77165e-08,3.59485e-08,2.87588e-08,2.79782e-08,2.16365e-08,2.16365e-08,2.11459e-08,2.05441e-08,1.99506e-08,1.93652e-08,1.54922e-08,1.54032e-08,1.52251e-08,1.48557e-08,1.45936e-08,1.45062e-08,1.44188e-08,1.43314e-08,1.43314e-08,1.43314e-08,6.94477e-09,6.67767e-09,3.21146e-09,3.14592e-09,3.12614e-09,3.04164e-09,3.04164e-09,3.04164e-09,1.46002e-09,1.40973e-09,1.40046e-09,1.40046e-09,1.36027e-09,1.32049e-09,1.27247e-09,6.36233e-10,6.31905e-10,3.0946e-10,2.9998e-10,2.97867e-10,2.90602e-10,2.90602e-10;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- a/test-data/result-5.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG1:0,638,0,598 -##source=LUMPY -##bcftools_annotateVersion=1.17+htslib-1.17 -##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:28:14 2024 -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:28:14 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 -chrI_sub 50000 1 N <DUP> 71.54 . SVTYPE=DUP;SVLEN=981;END=50981;STRANDS=-+:4;IMPRECISE;CIPOS=-646,29;CIEND=-30,505;CIPOS95=-164,8;CIEND95=-9,133;SU=4;PE=4;SR=0;AC=2;AN=2 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:8:71.54:-8,-2,-1:9:3:5:3:5:0:0:0:3:5:0.62
--- a/test-data/result-6.vcf Wed Jan 24 19:26:57 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER=<ID=PASS,Description="All filters passed"> -##fileDate=20240124 -##reference=reference.fa -##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> -##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> -##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> -##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> -##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> -##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> -##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> -##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> -##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> -##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> -##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> -##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> -##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> -##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> -##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> -##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> -##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> -##ALT=<ID=DEL,Description="Deletion"> -##ALT=<ID=DUP,Description="Duplication"> -##ALT=<ID=INV,Description="Inversion"> -##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> -##ALT=<ID=INS,Description="Insertion of novel sequence"> -##ALT=<ID=CNV,Description="Copy number variable region"> -##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> -##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> -##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> -##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> -##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> -##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> -##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> -##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> -##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> -##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> -##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> -##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> -##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> -##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> -##contig=<ID=chrI_sub,length=100000> -##smoove_version=0.2.8 -##smoove_count_stats=RG1:0,638,0,454 -##smoove_count_stats=RG2:0,466,0,362 -##source=LUMPY -##bcftools_annotateVersion=1.17+htslib-1.17 -##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Wed Jan 24 19:26:45 2024 -##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> -##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> -##bcftools_viewVersion=1.17+htslib-1.17 -##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Wed Jan 24 19:26:45 2024 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 -chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_cohort.vcf Fri Oct 17 17:21:17 2025 +0000 @@ -0,0 +1,59 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20251017 +##reference=reference.fa +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> +##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> +##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> +##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> +##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> +##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> +##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> +##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> +##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> +##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> +##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend"> +##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend"> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##ALT=<ID=CNV,Description="Copy number variable region"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> +##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> +##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> +##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> +##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> +##contig=<ID=chrI_sub,length=100000> +##smoove_version=0.2.8 +##smoove_count_stats=RG1:0,638,0,598 +##smoove_count_stats=RG2:0,466,0,448 +##source=LUMPY +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##bcftools_viewVersion=1.17+htslib-1.17 +##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:45:07 2025 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 +chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;PRPOS=0.258039,0.191959,0.142119,0.105597,0.0777833,0.0578014,0.0427846,0.032024,0.023764,0.0176738,0.0130831,0.00982276,0.00731486,0.00542727,0.00399758,0.00297292,0.00221878,0.00165017,0.00122989,0.000915475,0.000674295,0.000501964,0.000370575;PREND=5.85377e-06,7.73803e-06,1.03845e-05,1.38061e-05,1.83274e-05,2.4271e-05,3.24374e-05,4.32758e-05,5.77075e-05,7.68205e-05,0.000102643,0.000137372,0.000184925,0.00024509,0.000329301,0.000440606,0.000583882,0.000773449,0.00103874,0.00137293,0.0018211,0.00244098,0.0032246,0.00433051,0.00577701,0.00771097,0.0100947,0.0134638,0.017428,0.023191,0.0297785,0.0287776,0.0271433,0.0260442,0.0251005,0.0239221,0.0226973,0.022123,0.0207745,0.0198427,0.0190339,0.0181385,0.0179065,0.0174155,0.0168898,0.0165094,0.0157197,0.015398,0.0149506,0.0147207,0.014548,0.0142966,0.0140289,0.0136684,0.0132831,0.013124,0.0126578,0.0124307,0.0121283,0.0114912,0.0113024,0.0110801,0.0107855,0.0107408,0.0105719,0.010252,0.0100385,0.00989579,0.00964551,0.00952156,0.00917511,0.00901622,0.0089231,0.00872881,0.00852142,0.00828831,0.00796058,0.00770706,0.00751548,0.00708522,0.00687959,0.00654007,0.0063474,0.00626172,0.00607514,0.00598551,0.00572162,0.00547532,0.00517031,0.00507122,0.00476895,0.00459012,0.00440836,0.00415379,0.00393712,0.00382996,0.00364121,0.00353575,0.00342231,0.00315652,0.00304452,0.00290783,0.00272193,0.00266152,0.00248906,0.00233477,0.00230558,0.00218745,0.00210921,0.00208486,0.00195023,0.00191344,0.00188952,0.00177463,0.00176537,0.00165498,0.00159982,0.00154295,0.00152936,0.00151581,0.00148667,0.00136872,0.00133499,0.00122388,0.00117244,0.00114374,0.00109766,0.00105654,0.00102599,0.000908013,0.00089917,0.000841279,0.000780186,0.000748484,0.000704024,0.000674257,0.000641784,0.000584102,0.000563501,0.000540792,0.000519593,0.000507932,0.000498128,0.000471617,0.000459362,0.000446368,0.000438032,0.000435269,0.000425232,0.000401305,0.00038503,0.000357298,0.000347914,0.000330873,0.000326613,0.000317727,0.00029779,0.000277006,0.000261571,0.000246701,0.00023847,0.000227691,0.000216491,0.000207228,0.000194862,0.000188863,0.000183536,0.000181414,0.000168119,0.000161592,0.000152474,0.000150676,0.00014958,0.000147849,0.00014255,0.000141498,0.000136699,0.000135359,0.000130407,0.000124551,0.000118969,0.000112593,0.000109014,0.000105885,0.000101976,0.000101471,9.81661e-05,9.4903e-05,9.33805e-05,8.92603e-05,8.5718e-05,8.43405e-05,8.43405e-05,7.83631e-05,7.77206e-05,7.24593e-05,7.20819e-05,6.92001e-05,6.88047e-05,6.45869e-05,5.93805e-05,5.39222e-05,5.04172e-05,4.97203e-05,4.78039e-05,4.40517e-05,4.07077e-05,3.76779e-05,3.26187e-05,3.09886e-05,2.94952e-05,2.81091e-05,2.67635e-05,2.55503e-05,2.44152e-05,2.44152e-05,2.31509e-05,2.20697e-05,2.16647e-05,1.9562e-05,1.89572e-05,1.73377e-05,1.68284e-05,1.56743e-05,1.54104e-05,1.44891e-05,1.44455e-05,1.24429e-05,1.21889e-05,1.15144e-05,1.1345e-05,1.06711e-05,1.05578e-05,1.04779e-05,9.85417e-06,8.85111e-06,7.89968e-06,7.4231e-06,6.88815e-06,6.86571e-06,6.76755e-06,6.74529e-06,6.38168e-06,5.57945e-06,5.28579e-06,4.95863e-06,4.82984e-06,4.81336e-06,4.77422e-06,4.66379e-06,4.59303e-06,4.28648e-06,4.25661e-06,4.19686e-06,3.40704e-06,3.37014e-06,2.74502e-06,2.73489e-06,2.53014e-06,2.51133e-06,2.27525e-06,2.24712e-06,1.83448e-06,1.65944e-06,1.65308e-06,1.37572e-06,1.25623e-06,1.13226e-06,1.1278e-06,1.11313e-06,9.94694e-07,8.85533e-07,7.94148e-07,7.25022e-07,6.40991e-07,6.40991e-07,5.66608e-07,5.66608e-07,5.04999e-07,4.97883e-07,4.858e-07,4.858e-07,4.3091e-07,4.10457e-07,4.04316e-07,3.96503e-07,3.90456e-07,3.41062e-07,3.34301e-07,2.87897e-07,2.86605e-07,2.78756e-07,2.71737e-07,2.66751e-07,2.5482e-07,2.2371e-07,2.19872e-07,2.12274e-07,2.12274e-07,2.11258e-07,2.08515e-07,2.07512e-07,2.03502e-07,2.025e-07,2.01497e-07,1.7167e-07,1.70816e-07,1.44698e-07,1.43971e-07,1.02317e-07,1.02317e-07,7.27124e-08,7.0605e-08,5.82493e-08,5.76361e-08,5.76361e-08,5.59409e-08,3.89337e-08,3.87244e-08,3.79226e-08,3.77165e-08,3.59485e-08,2.87588e-08,2.79782e-08,2.16365e-08,2.16365e-08,2.11459e-08,2.05441e-08,1.99506e-08,1.93652e-08,1.54922e-08,1.54032e-08,1.52251e-08,1.48557e-08,1.45936e-08,1.45062e-08,1.44188e-08,1.43314e-08,1.43314e-08,1.43314e-08,6.94477e-09,6.67767e-09,3.21146e-09,3.14592e-09,3.12614e-09,3.04164e-09,3.04164e-09,3.04164e-09,1.46002e-09,1.40973e-09,1.40046e-09,1.40046e-09,1.36027e-09,1.32049e-09,1.27247e-09,6.36233e-10,6.31905e-10,3.0946e-10,2.9998e-10,2.97867e-10,2.90602e-10,2.90602e-10;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_paired.vcf Fri Oct 17 17:21:17 2025 +0000 @@ -0,0 +1,59 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20251017 +##reference=reference.fa +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> +##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> +##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> +##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> +##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> +##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> +##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> +##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> +##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> +##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##ALT=<ID=CNV,Description="Copy number variable region"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> +##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> +##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> +##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> +##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> +##contig=<ID=chrI_sub,length=100000> +##smoove_version=0.2.8 +##smoove_count_stats=RG1:0,638,0,454 +##smoove_count_stats=RG2:0,466,0,362 +##source=LUMPY +##bcftools_annotateVersion=1.17+htslib-1.17 +##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Fri Oct 17 17:44:56 2025 +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##bcftools_viewVersion=1.17+htslib-1.17 +##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:44:56 2025 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 RG2 +chrI_sub 50007 1 N <DUP> 103.66 . SVTYPE=DUP;SVLEN=1148;END=51155;STRANDS=-+:5;IMPRECISE;CIPOS=0,22;CIEND=-30,331;CIPOS95=0,10;CIEND95=-10,82;SU=5;PE=5;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:10:74.81:-9,-3,-2:7:1:5:1:5:0:0:0:1:5:0.83 1/1:2:28.84:-3,-1,-1:5:2:2:2:2:0:0:0:2:2:0.5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result_single.vcf Fri Oct 17 17:21:17 2025 +0000 @@ -0,0 +1,58 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20251017 +##reference=reference.fa +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> +##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> +##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> +##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> +##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> +##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> +##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> +##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> +##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> +##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> +##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend"> +##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend"> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##ALT=<ID=CNV,Description="Copy number variable region"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> +##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> +##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> +##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> +##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> +##contig=<ID=chrI_sub,length=100000> +##smoove_version=0.2.8 +##smoove_count_stats=RG1:0,638,0,598 +##source=LUMPY +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##bcftools_viewVersion=1.17+htslib-1.17 +##bcftools_viewCommand=view -O z -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Oct 17 17:45:18 2025 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT RG1 +chrI_sub 50000 1 N <DUP> 71.54 . SVTYPE=DUP;SVLEN=981;END=50981;STRANDS=-+:4;IMPRECISE;CIPOS=-646,29;CIEND=-30,505;CIPOS95=-164,8;CIEND95=-9,133;SU=4;PE=4;SR=0;PRPOS=6.63313e-13,6.63313e-13,9.28639e-13,3.71455e-12,3.71455e-12,3.71455e-12,4.2452e-12,4.2452e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,8.49041e-12,1.32663e-11,1.32663e-11,2.65325e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,2.98491e-11,5.85042e-11,5.85042e-11,5.85042e-11,6.50047e-11,8.49041e-11,8.49041e-11,8.49041e-11,1.16743e-10,1.16743e-10,1.16743e-10,1.16743e-10,1.16743e-10,1.27356e-10,1.27356e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.37969e-10,1.48582e-10,2.08015e-10,2.08015e-10,2.63269e-10,3.19684e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.65353e-10,3.86844e-10,4.77586e-10,4.77586e-10,4.77586e-10,4.77586e-10,5.77878e-10,5.77878e-10,6.09983e-10,7.22348e-10,7.58466e-10,9.02637e-10,9.02637e-10,1.05934e-09,1.05934e-09,1.05934e-09,1.05934e-09,1.10979e-09,1.28709e-09,1.28709e-09,1.28709e-09,1.3456e-09,2.20452e-09,2.20452e-09,2.20452e-09,2.39622e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.63584e-09,2.87546e-09,2.87546e-09,3.49234e-09,3.49234e-09,3.63204e-09,3.77173e-09,3.77173e-09,4.2123e-09,4.36274e-09,5.02858e-09,5.57183e-09,7.45929e-09,7.45929e-09,7.95657e-09,8.20522e-09,8.20522e-09,8.45386e-09,8.45386e-09,9.27816e-09,9.27816e-09,9.82393e-09,9.82393e-09,1.13689e-08,1.13689e-08,1.13689e-08,1.13689e-08,1.34321e-08,1.34321e-08,1.34321e-08,1.41783e-08,1.49245e-08,1.61061e-08,1.65414e-08,1.65414e-08,1.69767e-08,1.7412e-08,1.88328e-08,2.03093e-08,2.12764e-08,2.28817e-08,2.45452e-08,2.62672e-08,2.74612e-08,2.74612e-08,2.74612e-08,3.20258e-08,3.70203e-08,3.79233e-08,4.02565e-08,4.02565e-08,4.02565e-08,4.1215e-08,4.7281e-08,4.83555e-08,4.83555e-08,4.83555e-08,5.14073e-08,5.4545e-08,5.4545e-08,5.65652e-08,5.98319e-08,6.18951e-08,6.31846e-08,6.81057e-08,7.47103e-08,8.12235e-08,8.82283e-08,9.28108e-08,9.56232e-08,9.56232e-08,9.56232e-08,9.56232e-08,1.03158e-07,1.03158e-07,1.03158e-07,1.03158e-07,1.05069e-07,1.12395e-07,1.14402e-07,1.22157e-07,1.22157e-07,1.26369e-07,1.28475e-07,1.38407e-07,1.56156e-07,1.63334e-07,1.70252e-07,1.74617e-07,1.85271e-07,1.99275e-07,1.99275e-07,1.99275e-07,2.17015e-07,2.20071e-07,2.29337e-07,2.38793e-07,2.58278e-07,2.75015e-07,2.82655e-07,2.89549e-07,2.97374e-07,2.97374e-07,2.97374e-07,3.04455e-07,3.23402e-07,3.35104e-07,3.35104e-07,3.35104e-07,3.64585e-07,3.91068e-07,4.10224e-07,4.40476e-07,4.49847e-07,4.87137e-07,5.26094e-07,5.36831e-07,6.28698e-07,6.47988e-07,6.68396e-07,6.75822e-07,7.04434e-07,7.55902e-07,7.80286e-07,7.88414e-07,8.75731e-07,9.19142e-07,9.28426e-07,9.46995e-07,9.64532e-07,9.91697e-07,1.06626e-06,1.0955e-06,1.15552e-06,1.20686e-06,1.26203e-06,1.27371e-06,1.34862e-06,1.38434e-06,1.41875e-06,1.43153e-06,1.54371e-06,1.63101e-06,1.68683e-06,1.83001e-06,1.92736e-06,2.03583e-06,2.13276e-06,2.13276e-06,2.23194e-06,2.28238e-06,2.34578e-06,2.3982e-06,2.51744e-06,2.57247e-06,2.76434e-06,2.84595e-06,2.89338e-06,3.07861e-06,3.14112e-06,3.32116e-06,3.40742e-06,3.51855e-06,3.61953e-06,3.82682e-06,3.82682e-06,3.97829e-06,4.3079e-06,4.42232e-06,4.64309e-06,4.73138e-06,4.8178e-06,4.95949e-06,5.14289e-06,5.23818e-06,5.54465e-06,5.78622e-06,5.93058e-06,6.35481e-06,6.46518e-06,6.59449e-06,6.68869e-06,6.8494e-06,6.99209e-06,7.25209e-06,7.25209e-06,7.46728e-06,7.91281e-06,8.22498e-06,8.57942e-06,8.83103e-06,8.91132e-06,9.34443e-06,9.42786e-06,9.54497e-06,1.0001e-05,1.01503e-05,1.03626e-05,1.0708e-05,1.07721e-05,1.10062e-05,1.12436e-05,1.14173e-05,1.14844e-05,1.14844e-05,1.17286e-05,1.20455e-05,1.23309e-05,1.28821e-05,1.3366e-05,1.38719e-05,1.40722e-05,1.43823e-05,1.49633e-05,1.53878e-05,1.64742e-05,1.7479e-05,1.76711e-05,1.8827e-05,1.94225e-05,2.01862e-05,2.03427e-05,2.11326e-05,2.11326e-05,2.13976e-05,2.1561e-05,2.2619e-05,2.33086e-05,2.39458e-05,2.48369e-05,2.53281e-05,2.59478e-05,2.65228e-05,2.7221e-05,2.77425e-05,2.87972e-05,2.98736e-05,3.05204e-05,3.20095e-05,3.25818e-05,3.33799e-05,3.39248e-05,3.48474e-05,3.53405e-05,3.59666e-05,3.6784e-05,3.75874e-05,3.92024e-05,4.0241e-05,4.20038e-05,4.43531e-05,4.52941e-05,4.66503e-05,4.73292e-05,4.80349e-05,4.87269e-05,4.87269e-05,4.90119e-05,5.11599e-05,5.24451e-05,5.34086e-05,5.47146e-05,5.55009e-05,5.69811e-05,5.84684e-05,5.93665e-05,6.02015e-05,6.14814e-05,6.40561e-05,6.6295e-05,6.85631e-05,6.98671e-05,7.051e-05,7.17372e-05,7.47684e-05,7.68994e-05,7.71907e-05,7.77733e-05,8.10807e-05,8.48277e-05,8.70145e-05,8.93699e-05,9.4566e-05,9.72675e-05,0.00010155,0.000104033,0.000105817,0.000108912,0.000110757,0.000112617,0.000115108,0.000117426,0.000120796,0.000124217,0.000127035,0.000133958,0.000135284,0.00013771,0.000141465,0.000143421,0.000149375,0.000152392,0.000158742,0.000161697,0.000165596,0.000170667,0.000175111,0.000177145,0.000180153,0.000183445,0.000187346,0.000191523,0.000197398,0.000203528,0.000208594,0.000218837,0.000223945,0.000229382,0.000233277,0.000237934,0.000245812,0.000251642,0.000263897,0.000269275,0.000272132,0.000280268,0.000283994,0.000288584,0.000297088,0.000306919,0.000318725,0.00032372,0.000330515,0.000334746,0.000339348,0.000348665,0.000356201,0.000364227,0.000372055,0.000377991,0.000390787,0.000398994,0.000414013,0.000420426,0.000427645,0.00044589,0.000453498,0.000463816,0.000479121,0.00048951,0.000501275,0.000510841,0.00051379,0.00052265,0.00053549,0.000541968,0.000544993,0.000560474,0.000573072,0.000579449,0.000599811,0.000611191,0.000632963,0.000652784,0.000674123,0.000695846,0.00070158,0.000703662,0.000726154,0.000739201,0.000763046,0.000779934,0.000797922,0.000816787,0.000835967,0.000851666,0.00086839,0.00089212,0.00090997,0.000930854,0.000951251,0.000962474,0.000977879,0.00100665,0.00102443,0.00103429,0.00106096,0.00106526,0.00108546,0.00110102,0.00112637,0.00113921,0.00118109,0.00121094,0.00124954,0.00128687,0.00130263,0.00134446,0.00135993,0.00137618,0.00140981,0.00144243,0.00149414,0.00154698,0.00156333,0.00158274,0.00160802,0.00164882,0.00167303,0.0017254,0.001761,0.0017684,0.00182601,0.00186751,0.00188394,0.0019363,0.00196713,0.00200835,0.00203755,0.00206133,0.00211151,0.00213421,0.00220223,0.00224795,0.00226556,0.00231049,0.00238568,0.00244674,0.00246971,0.00251416,0.00253874,0.00255714,0.00261604,0.0026918,0.00271102,0.00276336,0.00279437,0.00285041,0.00289274,0.00294048,0.0029948,0.00306635,0.0031464,0.00319082,0.0032411,0.00331793,0.00337316,0.00342132,0.00347983,0.00354716,0.00356508,0.00361396,0.00370163,0.00373695,0.00374977,0.0037958,0.00384122,0.00389989,0.00401315,0.00411437,0.00419071,0.00425638,0.00431481,0.00435195,0.0044396,0.00452008,0.00457854,0.00462369,0.00470661,0.00480849,0.00487013,0.00494259,0.0050318,0.00506453,0.00514111,0.00521677,0.00528543,0.00536058,0.00546628,0.00556218,0.00569322,0.00578077,0.00584297,0.00592578,0.00605669,0.00608542,0.00612454,0.00617244,0.00623092,0.00634898,0.00647577,0.00663329,0.0067718,0.00685291,0.00695353,0.00702677,0.00713294,0.00729075,0.00737829,0.00747472,0.00761973,0.00771011,0.00779947,0.00787921,0.00800693,0.00801747,0.00820048,0.00824719,0.00828323,0.00842485,0.008535,0.00871719,0.00888922,0.0090021,0.00911408,0.00915106,0.00926424,0.00938023,0.00943336,0.00949674,0.00960008,0.00968815,0.00980632,0.00992631,0.0101043,0.0102097,0.0104223,0.0105457,0.0105742,0.0107447,0.0108014,0.0110009,0.0111191,0.0112494,0.0113544,0.0115402,0.0116588,0.0117334,0.0118262,0.0119305,0.012018,0.0121724,0.012295,0.0125323,0.0127161,0.012828,0.0130123,0.0131287,0.0133421,0.0135318,0.00997979,0.00742702,0.00552616,0.00412186,0.00306302,0.00227049,0.00167883,0.0012489,0.000924642,0.000687025,0.000506065,0.000376061,0.00027836,0.000208351,0.000154611,0.000114988,8.51197e-05,6.39078e-05,4.75912e-05,3.53103e-05,2.60086e-05,1.93421e-05,1.44356e-05,1.07361e-05,8.00181e-06,5.95616e-06,4.38703e-06,3.26583e-06,2.41099e-06;PREND=3.03189e-06,4.08075e-06,5.42715e-06,7.2752e-06,9.78941e-06,1.31464e-05,1.76564e-05,2.36589e-05,3.17685e-05,4.27446e-05,5.71532e-05,7.68831e-05,0.000103212,0.000136802,0.000184779,0.000246446,0.000330777,0.000443821,0.000589039,0.000780171,0.00103289,0.0013799,0.001855,0.00248779,0.00335196,0.00440533,0.00584237,0.0077623,0.0102619,0.0138515,0.0184346,0.0180755,0.0178232,0.0174946,0.0171445,0.0169641,0.0165645,0.0162885,0.0160958,0.0158644,0.0156359,0.0154264,0.0151264,0.01483,0.014592,0.014239,0.0139718,0.0137623,0.0134515,0.0131013,0.0128354,0.012673,0.0124121,0.0123026,0.0120969,0.0119128,0.0116482,0.0114645,0.0110884,0.0108412,0.0105261,0.0103996,0.0102712,0.0100908,0.00985027,0.00964829,0.0094525,0.00930827,0.00914226,0.00896594,0.00882723,0.00860671,0.00847046,0.00835048,0.00814898,0.0079474,0.00783296,0.00762806,0.00749484,0.00739587,0.00720925,0.00711508,0.00696391,0.00675945,0.00666688,0.00653664,0.00642077,0.00632301,0.00620215,0.00613777,0.00602424,0.00592835,0.00579545,0.00566521,0.00559181,0.00549276,0.00546682,0.00538658,0.00521263,0.00507089,0.00487481,0.00475571,0.00469295,0.00458368,0.00452003,0.00439439,0.0042222,0.00411819,0.00401878,0.00394121,0.0038585,0.00376369,0.00367385,0.00358379,0.00350216,0.00344247,0.00336339,0.00330029,0.00322828,0.00316982,0.00307046,0.00303561,0.00293575,0.0028841,0.00285284,0.00277647,0.00273949,0.00266295,0.00255831,0.00251499,0.00246623,0.00243464,0.00241937,0.00235306,0.00232614,0.00225741,0.00218091,0.0021302,0.00205022,0.00201654,0.00194522,0.00187635,0.00182548,0.0017735,0.00172878,0.00169442,0.00162852,0.00157855,0.00153443,0.00149397,0.00147055,0.00144682,0.00141845,0.00136882,0.00131586,0.00130082,0.00127695,0.00126218,0.00123721,0.00121279,0.00118854,0.00116046,0.00113674,0.00111913,0.00108491,0.00107559,0.00106259,0.00105148,0.00103479,0.00101689,0.000972695,0.000948257,0.000924493,0.000900991,0.000889668,0.000865146,0.000854104,0.00083534,0.000815753,0.000794719,0.000781337,0.000766838,0.000752243,0.000736664,0.000724082,0.000712894,0.000705973,0.00068831,0.000680327,0.000669642,0.000652806,0.000636146,0.00062849,0.000611093,0.000596294,0.000587972,0.000571393,0.0005645,0.000553982,0.000543961,0.000523867,0.000513997,0.000489447,0.00047912,0.00045258,0.000438637,0.000414931,0.000399876,0.000387653,0.000370544,0.000352613,0.000345222,0.00032418,0.000312893,0.000301045,0.000289058,0.00028623,0.000278805,0.000270802,0.000265514,0.00025398,0.000249552,0.000242674,0.000239684,0.000237239,0.000233503,0.000229487,0.000223591,0.000218308,0.00021671,0.000209341,0.000205909,0.000201536,0.000191555,0.000189009,0.000186479,0.000182105,0.000181642,0.000178786,0.000174499,0.0001717,0.000169812,0.00016606,0.000164195,0.000158742,0.000155993,0.000154637,0.00015152,0.000148165,0.000144351,0.000139104,0.000134674,0.000131545,0.000125267,0.000122249,0.000116809,0.000113561,0.000112219,0.000109435,0.000107821,0.000103422,9.89701e-05,9.3618e-05,9.21414e-05,8.77116e-05,8.47192e-05,8.16515e-05,7.70723e-05,7.37032e-05,7.19537e-05,6.85302e-05,6.67847e-05,6.47584e-05,5.97291e-05,5.80279e-05,5.57261e-05,5.23545e-05,5.13809e-05,4.82288e-05,4.52392e-05,4.46737e-05,4.25417e-05,4.10962e-05,4.06973e-05,3.83544e-05,3.76308e-05,3.71604e-05,3.50322e-05,3.4915e-05,3.28554e-05,3.18809e-05,3.08061e-05,3.05929e-05,3.03797e-05,2.98526e-05,2.76428e-05,2.70656e-05,2.4861e-05,2.3955e-05,2.35059e-05,2.26474e-05,2.17991e-05,2.13365e-05,1.90338e-05,1.89619e-05,1.77768e-05,1.65525e-05,1.5912e-05,1.50583e-05,1.45103e-05,1.38969e-05,1.27266e-05,1.23033e-05,1.18815e-05,1.14397e-05,1.1183e-05,1.09902e-05,1.04272e-05,1.01992e-05,9.93164e-06,9.80852e-06,9.76748e-06,9.58319e-06,9.04396e-06,8.69585e-06,8.12193e-06,7.92576e-06,7.53756e-06,7.44051e-06,7.25383e-06,6.81345e-06,6.35175e-06,6.03739e-06,5.70671e-06,5.52847e-06,5.29026e-06,5.05241e-06,4.86868e-06,4.58842e-06,4.46719e-06,4.36083e-06,4.33986e-06,4.02183e-06,3.89227e-06,3.67266e-06,3.65448e-06,3.6363e-06,3.6363e-06,3.51422e-06,3.49647e-06,3.39388e-06,3.37656e-06,3.29215e-06,3.17483e-06,3.04735e-06,2.89107e-06,2.81294e-06,2.73891e-06,2.65086e-06,2.65086e-06,2.57091e-06,2.49167e-06,2.4517e-06,2.36123e-06,2.28479e-06,2.25954e-06,2.25954e-06,2.11017e-06,2.09825e-06,1.98168e-06,1.98168e-06,1.91246e-06,1.90154e-06,1.78968e-06,1.65854e-06,1.5101e-06,1.41572e-06,1.40744e-06,1.36053e-06,1.26748e-06,1.17448e-06,1.09307e-06,9.51557e-07,9.04001e-07,8.65244e-07,8.26892e-07,7.89521e-07,7.62297e-07,7.30506e-07,7.30506e-07,6.94657e-07,6.6794e-07,6.59485e-07,5.98947e-07,5.87279e-07,5.40297e-07,5.25985e-07,4.92848e-07,4.86003e-07,4.58325e-07,4.58325e-07,3.99618e-07,3.93868e-07,3.7322e-07,3.67732e-07,3.49119e-07,3.46494e-07,3.43869e-07,3.27494e-07,2.96977e-07,2.65904e-07,2.50666e-07,2.35632e-07,2.35632e-07,2.33791e-07,2.33791e-07,2.22658e-07,1.96628e-07,1.86279e-07,1.7593e-07,1.73115e-07,1.73115e-07,1.71708e-07,1.68893e-07,1.67486e-07,1.56852e-07,1.56852e-07,1.56852e-07,1.29172e-07,1.29172e-07,1.06377e-07,1.06377e-07,9.87785e-08,9.87785e-08,9.01683e-08,8.93909e-08,7.29759e-08,6.67715e-08,6.67715e-08,5.5999e-08,5.15348e-08,4.68146e-08,4.68146e-08,4.6389e-08,4.17849e-08,3.74993e-08,3.37656e-08,3.09518e-08,2.75888e-08,2.75888e-08,2.45888e-08,2.45888e-08,2.19152e-08,2.1696e-08,2.12577e-08,2.12577e-08,1.89347e-08,1.8343e-08,1.81458e-08,1.79485e-08,1.77513e-08,1.57789e-08,1.56036e-08,1.35582e-08,1.35582e-08,1.32465e-08,1.30907e-08,1.30907e-08,1.26231e-08,1.1082e-08,1.09435e-08,1.06665e-08,1.06665e-08,1.06665e-08,1.05279e-08,1.05279e-08,1.05279e-08,1.05279e-08,1.05279e-08,8.96953e-09,8.96953e-09,7.63622e-09,7.63622e-09,5.45444e-09,5.45444e-09,3.89603e-09,3.84192e-09,3.21964e-09,3.21964e-09,3.21964e-09,3.12494e-09,2.19828e-09,2.19828e-09,2.16446e-09,2.16446e-09,2.063e-09,1.6504e-09,1.62335e-09,1.25539e-09,1.25539e-09,1.23374e-09,1.2121e-09,1.19045e-09,1.16881e-09,9.35048e-10,9.35048e-10,9.35048e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,9.17732e-10,4.50208e-10,4.32892e-10,2.12117e-10,2.07788e-10,2.07788e-10,2.03459e-10,2.03459e-10,2.03459e-10,9.95653e-11,9.74008e-11,9.74008e-11,9.74008e-11,9.52363e-11,9.30719e-11,9.09074e-11,4.54537e-11,4.54537e-11,2.27269e-11,2.21857e-11,2.21857e-11,2.16446e-11,2.16446e-11;AC=2;AN=2 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:8:71.54:-8,-2,-1:9:3:5:3:5:0:0:0:3:5:0.62
--- a/test-data/test_vcf2hrdetect.tab Wed Jan 24 19:26:57 2024 +0000 +++ b/test-data/test_vcf2hrdetect.tab Fri Oct 17 17:21:17 2025 +0000 @@ -1,180 +1,263 @@ chr1 pos1 chr2 pos2 type -10 132635667 10 132636051 INV -18 77309929 18 77312095 DEL -10 132988685 10 132991206 DEL -10 132858454 10 132858819 DEL -5 26796704 5 26801897 DEL -5 147553039 5 147554616 INV -20 3821170 20 3825120 DEL -6 26625483 7 68758443 TRA -6 161336425 7 85956546 TRA +1 14436660 1 14438563 DUP +1 28515659 19 24027918 TRA 1 42552553 4 44842163 TRA -4 73831211 4 73831818 DEL -8 59616000 16 3354097 TRA -8 30145409 17 7167951 TRA -3 158034493 6 32688592 TRA -8 140475500 12 58359059 TRA -11 99185461 11 99186696 DEL -16 33485944 16 33486280 DEL +1 43694066 1 43695538 DEL +1 44059282 1 44059890 INV +1 44059890 1 44059282 INV +1 64897140 6 159204434 TRA +1 66804423 13 59377693 TRA +1 80221789 1 80223032 DEL +1 80794627 1 80795137 INV +1 80795137 1 80794627 INV +1 91151631 8 85437297 TRA +1 92232065 1 92233362 DEL +1 105428633 7 93357413 TRA +1 119401195 15 51189306 TRA +1 168024578 19 24033135 TRA +1 221827800 13 61315891 TRA +1 234805693 16 59323740 TRA +2 12019022 7 52399206 TRA +2 18118870 2 18120031 DEL +2 19002880 2 19003387 DEL +2 34797601 2 34801579 DEL +2 34965732 2 34967261 DEL +2 48990532 2 48991191 DEL +2 76773562 2 76775444 DEL +2 77358259 18 59713754 TRA +2 78091959 4 174044216 TRA +2 120417073 2 120418454 DUP +2 125766561 2 125768399 DUP +2 153997618 17 74377266 TRA +2 156774338 2 156776256 DEL +2 160095376 2 160095824 INV +2 160095824 2 160095376 INV +2 178351450 19 52567370 TRA +2 178351470 19 52567638 TRA +2 179315148 2 179315689 DEL +2 189119618 2 189120979 DEL +2 204273805 4 68944782 TRA 3 12818457 5 144122990 TRA +3 32203783 3 32204153 DEL +3 38399708 8 75876952 TRA +3 48372309 7 32225987 TRA +3 54428366 12 28226427 TRA +3 75960769 11 130675918 TRA 3 105828859 5 141379626 TRA -8 96265720 19 18835613 TRA -6 32315438 6 32316258 DEL -10 55893654 10 55894125 DEL -7 97993595 7 97993987 DEL -5 53122773 5 53125484 INV -6 65022877 6 65023371 DUP -19 15046341 19 15049471 DEL -1 105428633 7 93357413 TRA -1 91151631 8 85437297 TRA -13 38071947 13 38085574 DEL -1 64897140 6 159204434 TRA -2 156774338 2 156776256 DEL -14 23105069 14 23107950 DEL -21 32956225 X 47183861 TRA -8 16821879 8 16822259 DEL -8 140475502 11 56303340 TRA 3 124936235 3 124937163 DEL -7 98015910 X 136092975 TRA -7 127930431 16 76865795 TRA -11 106143090 11 106143794 INV -6 37294925 6 37295284 DEL -2 179315148 2 179315689 DEL +3 129763389 3 129806742 DEL +3 144517821 3 144518457 DEL +3 151748833 3 176444683 INV +3 158034493 6 32688592 TRA +3 176444683 3 151748833 INV +3 182298978 11 94948981 TRA +3 182299013 9 1940734 TRA +3 184156702 20 231409 TRA +3 190785116 3 190789319 DEL +3 192376521 10 65545580 TRA +4 44842163 1 42552553 TRA +4 68944782 2 204273805 TRA +4 73831211 4 73831818 DEL +4 81136688 6 68923562 TRA +4 88268261 4 88274334 DEL +4 99513602 6 32589570 TRA +4 102787795 4 102788358 DEL +4 144573271 4 144573612 DEL +4 151011309 4 151011648 DUP 4 162642267 X 3599701 TRA 4 162642407 X 3599706 TRA -4 144573271 4 144573612 DEL -7 86615393 7 86615759 DEL -2 48990532 2 48991191 DEL -X 114216686 X 114544524 INV -2 160095376 2 160095824 INV -17 46615627 17 46617222 DUP -17 45634774 17 45635284 DEL -20 62949719 20 62959202 DEL -2 18118870 2 18120031 DEL -10 59257660 10 59257985 INV -9 127885505 18 71711520 TRA -10 51019958 12 46175132 TRA -10 59256942 10 59257951 INV -10 68673027 16 59976476 TRA -7 100056291 7 129157154 DUP +4 174044216 2 78091959 TRA +4 175534267 10 123120353 TRA +4 183256660 8 131547788 TRA +4 186382139 9 129005120 TRA +5 1178285 5 1180725 DEL +5 5595231 15 52589534 TRA +5 26796704 5 26801897 DEL 5 37709852 7 8663298 TRA -3 129763389 3 129806742 DEL +5 53122606 5 53125511 INV +5 53122773 5 53125484 INV +5 53125484 5 53122773 INV +5 53125511 5 53122606 INV 5 54865791 10 75489275 TRA 5 54865801 10 75489000 TRA -3 144517821 3 144518457 DEL +5 95966521 5 95967361 DEL 5 103062313 5 103062635 DEL +5 108595090 5 108601127 DEL +5 127336004 5 127336923 DEL +5 141379626 3 105828859 TRA +5 144122990 3 12818457 TRA +5 147553039 5 147554616 INV +5 147554616 5 147553039 INV +5 157223490 5 176652060 INV +5 157223548 11 3760878 TRA +5 169597700 5 169598748 INV +5 169598748 5 169597700 INV +5 176651964 11 3760899 TRA +5 176652060 5 157223490 INV +5 178109055 5 178113407 DEL +6 17608998 X 117432517 TRA +6 20169115 6 108322150 DUP +6 26625483 7 68758443 TRA +6 32315438 6 32316258 DEL +6 32589570 4 99513602 TRA +6 32688592 3 158034493 TRA +6 37294925 6 37295284 DEL +6 37295025 12 22085955 TRA +6 65022877 6 65023371 DUP +6 68923562 4 81136688 TRA +6 72863847 6 72873539 DEL +6 74156485 14 71480241 TRA 6 74932726 6 74933069 DEL -2 189119618 2 189120979 DEL -16 13294470 16 13296490 DEL -6 72863847 6 72873539 DEL -4 81136688 6 68923562 TRA -20 61724700 20 61725614 DEL -4 99513602 6 32589570 TRA +6 89921735 6 89922171 DEL +6 107307808 8 105231718 TRA 6 136589486 6 136590574 DEL -17 70815221 17 70821355 DEL -17 68159938 17 68160278 DEL -10 35593223 10 35593548 DEL -4 88268261 4 88274334 DEL -10 45158118 10 45158533 DEL -14 90299534 14 90302487 INV -9 141058509 10 107209 TRA -3 151748833 3 176444683 INV -1 92232065 1 92233362 DEL -22 19099927 22 19100621 DEL -22 17770355 22 17779150 DEL -3 32203783 3 32204153 DEL -13 35787786 X 11952984 TRA -14 106484224 15 22486809 TRA -2 19002880 2 19003387 DEL -X 55702382 X 55709885 DEL -4 102787795 4 102788358 DEL -12 30478280 12 30480988 DEL -15 75867566 16 89281701 TRA -4 186382139 9 129005120 TRA -4 183256660 8 131547788 TRA -2 12019022 7 52399206 TRA +6 159204434 1 64897140 TRA +6 161336425 7 85956546 TRA +7 8663298 5 37709852 TRA +7 32225987 3 48372309 TRA +7 52399206 2 12019022 TRA +7 65857652 7 65860106 DEL +7 68758443 6 26625483 TRA +7 70420972 7 70438890 INV +7 70438890 7 70420972 INV 7 76134084 7 76140082 DEL -4 175534267 10 123120353 TRA -15 62706095 15 62707781 DEL -14 47118175 14 63226299 INV -12 12026417 12 12027146 DEL -14 60341404 14 60343237 DEL -14 61058056 14 61059493 DEL -6 17608998 X 117432517 TRA -5 108595090 5 108601127 DEL -11 93695221 11 93702091 DEL -2 34965732 2 34967261 DEL -2 34797601 2 34801579 DEL -6 74156485 14 71480241 TRA +7 85956546 6 161336425 TRA +7 86615393 7 86615759 DEL +7 93357413 1 105428633 TRA +7 93541820 7 93542546 DEL +7 97993595 7 97993987 DEL +7 98015910 X 136092975 TRA +7 100056291 7 129157154 DUP +7 111053752 12 108203254 TRA +7 127930431 16 76865795 TRA 7 151230203 7 151231867 DEL 7 156387047 7 156387226 INV -1 14436660 1 14438563 DUP -5 127336004 5 127336923 DEL -5 157223490 5 176652060 INV -5 1178285 5 1180725 DEL -3 190785116 3 190789319 DEL -11 97172221 11 97172970 DEL -14 80106294 14 80115045 DEL -14 65842539 14 65843134 INV -16 69854786 16 69859026 DUP -15 39744398 15 39744849 DEL -7 65857652 7 65860106 DEL -1 43694066 1 43695538 DEL -1 44059282 1 44059890 INV -9 104714849 9 104724476 DEL +7 156387226 7 156387047 INV +8 11245570 8 11247212 DEL +8 14588058 8 14588822 DEL +8 16821879 8 16822259 DEL +8 30145409 17 7167951 TRA +8 59616000 16 3354097 TRA +8 75876952 3 38399708 TRA +8 85437297 1 91151631 TRA +8 96265720 19 18835613 TRA +8 105231718 6 107307808 TRA +8 118980771 8 118981151 DEL +8 131547788 4 183256660 TRA +8 140475500 12 58359059 TRA +8 140475502 11 56303340 TRA +9 1940734 3 182299013 TRA 9 104182136 9 104182429 DUP -7 70420972 7 70438890 INV -6 89921735 6 89922171 DEL -3 182299013 9 1940734 TRA -5 157223548 11 3760878 TRA -3 192376521 10 65545580 TRA -3 48372309 7 32225987 TRA -3 38399708 8 75876952 TRA -5 95966521 5 95967361 DEL -6 107307808 8 105231718 TRA -3 182298978 11 94948981 TRA -3 75960769 11 130675918 TRA -6 37295025 12 22085955 TRA -4 151011309 4 151011648 DUP -6 20169115 6 108322150 DUP +9 104714849 9 104724476 DEL +9 110033442 9 110035532 DEL +9 127885505 18 71711520 TRA +9 129005120 4 186382139 TRA 9 139427511 9 139427797 DEL +9 141058509 10 107209 TRA +10 107209 9 141058509 TRA +10 35593223 10 35593548 DEL +10 45158118 10 45158533 DEL +10 51019958 12 46175132 TRA +10 55893654 10 55894125 DEL +10 59256942 10 59257951 INV +10 59257660 10 59257985 INV +10 59257951 10 59256942 INV +10 59257985 10 59257660 INV +10 65545580 3 192376521 TRA +10 68673027 16 59976476 TRA +10 75489000 5 54865801 TRA +10 75489275 5 54865791 TRA +10 87242583 10 87243046 DUP +10 123120353 4 175534267 TRA +10 132635667 10 132636051 INV +10 132636051 10 132635667 INV +10 132858454 10 132858819 DEL +10 132988685 10 132991206 DEL +11 3760878 5 157223548 TRA +11 3760899 5 176651964 TRA +11 56303340 8 140475502 TRA +11 93695221 11 93702091 DEL +11 94948981 3 182298978 TRA +11 97172221 11 97172970 DEL +11 99185461 11 99186696 DEL +11 106143090 11 106143794 INV +11 106143794 11 106143090 INV +11 130675918 3 75960769 TRA +12 12026417 12 12027146 DEL +12 22085955 6 37295025 TRA +12 28226427 3 54428366 TRA +12 30478280 12 30480988 DEL +12 46175132 10 51019958 TRA +12 58359059 8 140475500 TRA +12 104359630 12 125801144 INV +12 108203254 7 111053752 TRA +12 125801144 12 104359630 INV +13 35787786 X 11952984 TRA +13 38071947 13 38085574 DEL +13 59377693 1 66804423 TRA +13 61315891 1 221827800 TRA 13 74915300 13 74916746 DEL -1 168024578 19 24033135 TRA -2 125766561 2 125768399 DUP -2 120417073 2 120418454 DUP +13 89421953 13 89422932 DEL +14 23105069 14 23107950 DEL 14 32953296 14 32954345 DEL -7 93541820 7 93542546 DEL -8 11245570 8 11247212 DEL -7 111053752 12 108203254 TRA -1 234805693 16 59323740 TRA -1 80794627 1 80795137 INV -9 110033442 9 110035532 DEL -1 80221789 1 80223032 DEL -1 66804423 13 59377693 TRA -1 221827800 13 61315891 TRA -1 119401195 15 51189306 TRA +14 41829108 14 41980350 DEL +14 42486317 14 42486634 DEL +14 47118175 14 63226299 INV +14 60341404 14 60343237 DEL +14 61058056 14 61059493 DEL +14 63226299 14 47118175 INV +14 65842539 14 65843134 INV +14 65843134 14 65842539 INV +14 71480241 6 74156485 TRA +14 80106294 14 80115045 DEL +14 90299534 14 90302487 INV +14 90302487 14 90299534 INV +14 106484224 15 22486809 TRA +15 22486809 14 106484224 TRA +15 39744398 15 39744849 DEL +15 51189306 1 119401195 TRA +15 52589534 5 5595231 TRA +15 62706095 15 62707781 DEL +15 75867566 16 89281701 TRA +16 3354097 8 59616000 TRA +16 13294470 16 13296490 DEL +16 33485944 16 33486280 DEL +16 59323740 1 234805693 TRA +16 59976476 10 68673027 TRA +16 69854786 16 69859026 DUP +16 76865795 7 127930431 TRA 16 85302496 16 85304389 DEL -8 118980771 8 118981151 DEL -3 54428366 12 28226427 TRA +16 89281701 15 75867566 TRA +17 7167951 8 30145409 TRA +17 45634774 17 45635284 DEL +17 46615627 17 46617222 DUP +17 68159938 17 68160278 DEL +17 70815221 17 70821355 DEL +17 74377266 2 153997618 TRA 18 39585688 18 39586313 DEL -3 184156702 20 231409 TRA -13 89421953 13 89422932 DEL -5 169597700 5 169598748 INV -2 76773562 2 76775444 DEL -5 178109055 5 178113407 DEL -10 87242583 10 87243046 DUP -8 14588058 8 14588822 DEL +18 59713754 2 77358259 TRA +18 71711520 9 127885505 TRA +18 77309929 18 77312095 DEL +19 15046341 19 15049471 DEL +19 18835613 8 96265720 TRA +19 24027918 1 28515659 TRA +19 24033135 1 168024578 TRA 19 51077891 19 51082521 DEL -2 78091959 4 174044216 TRA -2 204273805 4 68944782 TRA -5 176651964 11 3760899 TRA -5 5595231 15 52589534 TRA -1 28515659 19 24027918 TRA -2 153997618 17 74377266 TRA -12 104359630 12 125801144 INV -2 178351470 19 52567638 TRA -2 178351450 19 52567370 TRA -2 77358259 18 59713754 TRA -14 42486317 14 42486634 DEL -14 41829108 14 41980350 DEL -5 53122606 5 53125511 INV +19 52567370 2 178351450 TRA +19 52567638 2 178351470 TRA +20 231409 3 184156702 TRA +20 3821170 20 3825120 DEL +20 61724700 20 61725614 DEL +20 62949719 20 62959202 DEL +21 32956225 X 47183861 TRA +22 17770355 22 17779150 DEL +22 19099927 22 19100621 DEL +X 3599701 4 162642267 TRA +X 3599706 4 162642407 TRA +X 11952984 13 35787786 TRA +X 47183861 21 32956225 TRA +X 55702382 X 55709885 DEL +X 114216686 X 114544524 INV +X 114544524 X 114216686 INV +X 117432517 6 17608998 TRA +X 136092975 7 98015910 TRA
--- a/vcf2hrdetect.py Wed Jan 24 19:26:57 2024 +0000 +++ b/vcf2hrdetect.py Fri Oct 17 17:21:17 2025 +0000 @@ -1,41 +1,119 @@ +#!/usr/bin/env python +import argparse +import re import sys -handle = open(sys.argv[1], 'r') -vcfdict = dict() -tabdict = dict() -for line in handle: - if line[0] == "#": - continue - else: - tabline = line[:-1].split("\t") - vcfdict[tabline[2]] = tabline -for id in vcfdict.keys(): - if "_1" in id: - newid = id[:-2] - pointbreak = vcfdict[id][4] - if "]" in pointbreak: - coordbreak = pointbreak.split("]")[1].split(":")[1] - chrom = pointbreak.split("]")[1].split(":")[0] - elif "[" in pointbreak: - coordbreak = pointbreak.split("[")[1].split(":")[1] - chrom = pointbreak.split("[")[1].split(":")[0] - if vcfdict[id][0] == chrom: - tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"] - else: - tabdict[newid] = [vcfdict[id][0], vcfdict[id][1], - chrom, coordbreak, "TRA"] -for id in list(vcfdict): - if "_" in id: - del vcfdict[id] -for id in vcfdict.keys(): # only sv that are not of type TRA or INV - chr1 = vcfdict[id][0] - chr2 = vcfdict[id][0] - pos1 = vcfdict[id][1] - pos2 = vcfdict[id][7].split("END=")[1].split(";")[0] - type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0] - tabdict[id] = [chr1, pos1, chr2, pos2, type] -out = open(sys.argv[2], 'w') -out.write("chr1\tpos1\tchr2\tpos2\ttype\n") -for key in tabdict: - line = "\t".join(tabdict[key]) + "\n" - out.write(line) + +def create_arg_parser(): + """Creates and returns the argument parser.""" + parser = argparse.ArgumentParser( + description=( + "Convert a VCF file from lumpy-smoove to a tabular format " + "compatible with the HRDetect pipeline." + ) + ) + parser.add_argument( + 'vcf_file', + help='Path to the input VCF file.' + ) + parser.add_argument( + 'output_file', + help='Path to the output tabular file.' + ) + return parser + + +def parse_breakend_alt(alt_field): + """ + Parses the ALT field for a breakend and returns chromosome and position. + + Args: + alt_field (str): The ALT field (column 5) of a VCF line. + + Returns: + tuple: A tuple containing (chromosome, position) or (None, None) + if parsing fails. + """ + # Search for patterns ]chr:pos] or [chr:pos[ + pattern = ( + r"\](?P<chrom1>[^:]+):(?P<pos1>\d+)\]|" + r"\[(?P<chrom2>[^:]+):(?P<pos2>\d+)\[" + ) + match = re.search(pattern, alt_field) + + if not match: + return None, None + + groups = match.groupdict() + chrom = groups['chrom1'] or groups['chrom2'] + pos = groups['pos1'] or groups['pos2'] + return chrom, pos + + +def process_vcf(vcf_path, output_path): + """ + Reads a VCF file, converts it, and writes the result to a tabular file. + + Args: + vcf_path (str): Path to the input VCF file. + output_path (str): Path to the output tabular file. + """ + header = ["chr1", "pos1", "chr2", "pos2", "type"] + try: + with open(vcf_path, 'r') as infile, open(output_path, 'w') as outfile: + outfile.write("\t".join(header) + "\n") + + for line in infile: + if line.startswith('#'): + continue + + fields = line.strip().split('\t') + if len(fields) < 8: + continue + + chrom1 = fields[0] + pos1 = fields[1] + info = fields[7] + + # Attempt to extract the structural variant type from the info + svtype_match = re.search(r'SVTYPE=([^;]+)', info) + if not svtype_match: + continue # Skip lines without SVTYPE tag + svtype = svtype_match.group(1) + + if svtype == "BND": # Breakend (INV or TRA) + alt_field = fields[4] + chrom2, pos2 = parse_breakend_alt(alt_field) + if not (chrom2 and pos2): + continue + event_type = "INV" if chrom1 == chrom2 else "TRA" + row = [chrom1, pos1, chrom2, pos2, event_type] + outfile.write("\t".join(row) + "\n") + + else: # Other SV types (DEL, DUP, etc.) + end_match = re.search(r'END=([^;]+)', info) + if not end_match: + continue + pos2 = end_match.group(1) + chrom2 = chrom1 + row = [chrom1, pos1, chrom2, pos2, svtype] + outfile.write("\t".join(row) + "\n") + + except FileNotFoundError: + print(f"Error: File '{vcf_path}' not found.", + file=sys.stderr) + sys.exit(1) + except IOError as e: + print(f"IO Error: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + """Main function of the script.""" + parser = create_arg_parser() + args = parser.parse_args() + process_vcf(args.vcf_file, args.output_file) + + +if __name__ == '__main__': + main()
--- a/vcf2hrdetect.xml Wed Jan 24 19:26:57 2024 +0000 +++ b/vcf2hrdetect.xml Fri Oct 17 17:21:17 2025 +0000 @@ -1,38 +1,44 @@ -<tool id="vcf2hrdetect" name="Convert lumpy-smoove vcf" version="3"> - <description>to tabular hrdetect</description> - <requirements> - <requirement type="package" version="0.2.8">smoove</requirement> - </requirements> +<tool id="vcf2hrdetect" name="Convert VCF for HRDetect" version="4"> + <description>from lumpy-smoove output to a 5-column tabular format</description> <macros> - <import>macro_lumpy_smoove.xml</import> + <!-- Use the centralized macros file --> + <import>macros.xml</import> </macros> - <stdio> - <exit_code range="1:" level="fatal" description="Tool exception" /> - </stdio> + + <!-- Expand macros for requirements and stdio --> + <expand macro="vcf2hrdetect_requirements" /> + <expand macro="stdio" /> + <command detect_errors="exit_code"><![CDATA[ @pipefail@ python '$__tool_directory__'/vcf2hrdetect.py '$lumpy_smoove_vcf' '$hrdetect_tabular' ]]></command> + <inputs> - <param name="lumpy_smoove_vcf" type="data" format="vcf" label="lumpy-smoove vcf to process"/> - </inputs> + <param name="lumpy_smoove_vcf" type="data" format="vcf" label="Input lumpy-smoove VCF" help="VCF file containing structural variants detected by lumpy-smoove."/> + </inputs> <outputs> - <data format="tabular" name="hrdetect_tabular" label="tabular hrdetec SVs" /> + <data format="tabular" name="hrdetect_tabular" label="HRDetect formatted structural variants" /> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="lumpy_smoove_vcf" value="test_vcf2hrdetect.vcf" ftype="vcf" /> <output name="hrdetect_tabular" file="test_vcf2hrdetect.tab" sort="true" ftype="tabular" /> </test> </tests> + <help> -**vcf2hrdetect** convert a vcf produced by lumpy-smoove to a somatic_sv.tsv tabular file compatible with -the HRDetect pipeline (https://github.com/eyzhao/hrdetect-pipeline/blob/master/data/example/patients/patient1/sample1/somatic_sv.tsv) +**What it does** + +This tool converts a VCF file produced by **lumpy-smoove** into the 5-column tabular format (chr1, pos1, chr2, pos2, type) required by the `HRDetect`_ pipeline. + +.. _HRDetect: https://github.com/eyzhao/hrdetect-pipeline/ </help> <citations> <citation type="doi">10.1038/nm.4292</citation> </citations> </tool> +