Mercurial > repos > iuc > sniffles
changeset 0:93c4b04a0769 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit f5977355970ef4994957147d2d8a96fe6605e2b4"
author | iuc |
---|---|
date | Mon, 14 Sep 2020 07:39:07 +0000 |
parents | |
children | 3f6f028f418f |
files | sniffles.xml test-data/expected_outcome3.vcf test-data/expected_outcome4.vcf test-data/expected_outcome5.vcf test-data/expected_outcome6.vcf test-data/expected_output.vcf test-data/expected_output2.vcf test-data/reads_region.bam |
diffstat | 8 files changed, 1018 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sniffles.xml Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,285 @@ +<tool id="sniffles" name="sniffles" version="@TOOL_VERSION@+galaxy0"> + <description>Structural variation caller using third generation sequencing</description> + <macros> + <token name="@TOOL_VERSION@">1.0.12</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">sniffles</requirement> + </requirements> + <version_command> + <![CDATA[ sniffles --version ]]> + </version_command> + <command detect_errors="exit_code"> + <![CDATA[ +ln -f -s '${input}' input.bam && +ln -f -s '${input.metadata.bam_index}' input.bam.bai && + +sniffles +-t \${GALAXY_SLOTS:-2} +-m 'input.bam' +-v '$output' +## general_options +#if $general_options.min_support: + --min_support $general_options.min_support +#end if + --max_num_splits $general_options.max_num_splits +#if $general_options.max_distance: + --max_distance $general_options.max_distance +#end if +#if $general_options.min_length: + --min_length $general_options.min_length +#end if + --minmapping_qual $general_options.minmapping_qual + --num_reads_report $general_options.num_reads_report +#if $general_options.min_seq_size: + --min_seq_size $general_options.min_seq_size +#end if + --min_zmw $general_options.min_zmw + $general_options.cs_string +## clustering_options + $clustering_options.cluster +#if $clustering_options.cluster_support: + --cluster_support $clustering_options.cluster_support +#end if + --allelefreq $clustering_options.allelefreq + --min_homo_af $clustering_options.min_homo_af + --min_het_af $clustering_options.min_het_af +##advanced_options + $advanced_options.report_BND + $advanced_options.not_report_seq + $advanced_options.ignore_sd + $advanced_options.ccs_reads +## parameter_estimation_options + $parameter_estimation_options.skip_parameter_estimation + --del_ratio $parameter_estimation_options.del_ratio + --ins_ratio $parameter_estimation_options.ins_ratio + --max_diff_per_window $parameter_estimation_options.max_diff_per_window + --max_dist_aln_events $parameter_estimation_options.max_dist_aln_events + ]]> + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input BAM file"/> + <section name="general_options" title="Set general options" expanded="False"> + <param argument="--min_support" type="integer" value="10" optional="true" min="1" label="Minimum Support" help="Minimum number of reads that support a SV. [10]" /> + <param argument="--max_num_splits" type="integer" value="7" optional="true" min="0" label="Maximum Number of Splits" help="Maximum number of splits per read to be still taken into account. [7]" /> + <param argument="--max_distance" type="integer" value="1000" optional="true" min="10" label="Maximum Distance" help="Maximum distance to group SV together. [1000]" /> + <param argument="--min_length" type="integer" value="30" optional="true" min="2" label="Minimum Length" help="Minimum length of SV to be reported. [30]"/> + <param argument="--minmapping_qual" type="integer" value="20" optional="true" min="0" label="Minimum Mapping Quality" help="Minimum Mapping Quality. [20]"/> + <param argument="--num_reads_report" type="integer" value="0" optional="true" min="-1" label="Number of reads to report" help="Report up to N reads that support the SV in the vcf file. -1: report all. [0]"/> + <param argument="--min_seq_size" type="integer" value="" optional="true" label="Minimum Seq Size" help="Discard read if non of its segment is larger then this. [2000]"/> + <param argument="--min_zmw" type="integer" value="0" optional="true" min="0" label="Minimum ZMW" help="Discard SV that are not supported by at least x zmws. This applies only for PacBio recognizable reads. [0]"/> + <param argument="--cs_string" type="boolean" truevalue="--cs_string" falsevalue="" optional="true" label="Enable CS String" help="Enables the scan of CS string instead of Cigar and MD. [false]"/> + </section> + <section name="clustering_options" title="Clustering/phasing and genotyping options" expanded="False"> + <param argument="--cluster" type="boolean" truevalue="--cluster" falsevalue="" optional="true" label="Cluster" help="Enables Sniffles to phase SVs that occur on the same reads [false]"/> + <param argument="--cluster_support" type="integer" value="1" optional="true" min="1" label="Cluster Support" help="Minimum number of reads supporting clustering of SV. [1]"/> + <param argument="--allelefreq" type="float" value="0" optional="true" min="0" label="Allele Frequency Threshold" help="Filters the SV calls based on the allele frequency. [0]" /> + <param argument="--min_homo_af" type="float" value="0.8" optional="true" min="0" max="1" label="Minimum Homogenous Allele Frequency" help="Minimum homogeneous threshold on allele frequency (0-1). [0.8]"/> + <param argument="--min_het_af" type="float" value="0.3" optional="true" min="0" max="1" label="Minimum Heterogeneous Allele Frequency" help="Minimum heterogeneous threshold on allele frequency (0-1). [0.3]"/> + </section> + <section name="advanced_options" title="Advanced options" expanded="False"> + <param argument="--report_BND" type="boolean" value="True" truevalue="--report_BND" falsevalue="" optional="true" label="Report BND" help="Report BND instead of Tra in vcf output. [true]" /> + <param argument="--not_report_seq" type="boolean" value="False" truevalue="--not_report_seq" falsevalue="" optional="true" label="Don't report seq" help="Don't report sequences for indels in vcf output. (Beta version!) [false]"/> + <param argument="--ignore_sd" type="boolean" value="False" truevalue="--ignore_sd" falsevalue="" optional="true" label="Igonore sd" help="Ignores the sd based filtering. [false]"/> + <param argument="--ccs_reads" type="boolean" value="False" truevalue="--ccs_reads" falsevalue="" optional="true" label="CCS Reads" help="Preset CCS Pacbio setting. (Beta) [false]" /> + </section> + <section name="parameter_estimation_options" title="Parameter Estimation Options" expanded="False"> + <param argument="--skip_parameter_estimation" type="boolean" value="False" truevalue="--skip_parameter_estimation" falsevalue="" optional="true" label="Skip Parameter Estimation" help="Enables the scan if only very few reads are present. [false]"/> + <param argument="--del_ratio" type="float" value="0.0458369" optional="true" min="0" max="1" label="Estimated Deletion Ratio" help="Estimated ratio of deletions per read (0-1). [0.0458369]" /> + <param argument="--ins_ratio" type="float" value="0.049379" optional="true" min="0" max="1" label="Estimated Insertion Ratio" help="Estimated ratio of insertions per read (0-1). [0.049379]" /> + <param argument="--max_diff_per_window" type="integer" value="50" optional="true" min="0" label="Maximum Differences Per Window" help="Maximum differences per 100bp. [50]"/> + <param argument="--max_dist_aln_events" type="integer" value="4" optional="true" min="0" label="Maximum Distance Between Alignment Events" help="Maximum distance between alignment (indel) events. [4]"/> + </section> + </inputs> + <outputs> + <data name="output" format="vcf" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test> <!-- test 1 - standard run --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <output name="output" file="expected_output.vcf" lines_diff="2"/> + </test> + <test> <!-- test 2 - add reads into report --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <param name="num_reads_report" value="-1"/> + <output name="output" file="expected_output2.vcf" lines_diff="2"/> + </test> + <test> <!-- test 3 - use cs_string --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <param name="cs_string" value="true"/> + <output name="output" file="expected_outcome3.vcf" lines_diff="2"/> + </test> + <test> <!-- test 4 - clustering --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <param name="cluster" value="True"/> + <output name="output" file="expected_outcome4.vcf" lines_diff="2"/> + </test> + <test> <!-- test 5 - Advanced - Report BND --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <param name="report_BND" value="True"/> + <output name="output" file="expected_outcome5.vcf" lines_diff="2"/> + </test> + <test> <!-- test 6 - Parameter Estimation - skip --> + <param name="input" value="reads_region.bam"/> + <param name="output_format" value="vcf"/> + <param name="skip_parameter_estimation" value="True"/> + <output name="output" file="expected_outcome6.vcf" lines_diff="2"/> + </test> + </tests> + <help> + <![CDATA[ +######## +Sniffles +######## + +What is Sniffles? +***************** +Sniffles is a SV caller for long reads. It is mainly designed for PacBio reads, but also works on Oxford Nanopore reads. SV are larger events on the genome (e.g. deletions, duplications, insertions, inversions and translocations). Sniffles can detect all of these type and more such as nested SVs (e.g. inversion flanked by deletions or an inverted duplication). Furthermore, Sniffles incorporates multiple auto tuning functions to determine data set depending parameter to reduce the overall risk of falsely infer SVs. + +Quick Start +*********** + +Make sure you have a sorted bam file either from ngmlr or from bwa. For the later make sure you have used -M parameter for mapping to mark which alignments are primary and which are secondary! Note you have to adjust the parameters for low coverage cases. + +Parameters +********** + +General +------- + ++---------------------------+-----------------------------------------------------------------------+ +| Parameter | Description | ++===========================+=======================================================================+ +| Minimum Support | Minimum number of reads that support a SV to be reported. Default: 10 | ++---------------------------+-----------------------------------------------------------------------+ +| Maximum Number of Splits | Maximum number of split segments a read is aligned at before it is | +| | ignored. Default: 7 | ++---------------------------+-----------------------------------------------------------------------+ +| Maximum Distance | Maximum distance to group SV together. Sniffles estimates this | +| | parameter during runtime to group together SVs reported by different | +| | reads. Default: 1kb | ++---------------------------+-----------------------------------------------------------------------+ +| Minimum Length | Minimum length of SV to be reported. Default: 30bp | ++---------------------------+-----------------------------------------------------------------------+ +| Minimum Mapping Quality | Minimum mapping quality of alignment to be taken into account. | +| | Default: 20 | ++---------------------------+-----------------------------------------------------------------------+ +| Number of Reads to Report | Number of read names to be reported that support the SV in the vcf | +| | file. Default: 0 | ++---------------------------+-----------------------------------------------------------------------+ +| Minimum Seq Size | Discard read if none of its segment is larger then this. Default: 2kb | ++---------------------------+-----------------------------------------------------------------------+ +| Minimum ZMW | Discard SV that are not supported by at least x zmws. This applies | +| | only for PacBio recognizable reads. Default: 0 | ++---------------------------+-----------------------------------------------------------------------+ +| Enable CS String | Enables the scan of CS string instead of Cigar and MD. Default: False| ++---------------------------+-----------------------------------------------------------------------+ + +| + +Clustering Options +------------------ + ++----------------------------------------+-----------------------------------------------------------------------+ +| Parameter | Description | ++========================================+=======================================================================+ +| Cluster | Performs read based phasing to mark SVs that occur together. | ++----------------------------------------+-----------------------------------------------------------------------+ +| Cluster Support | Minimum number of reads supporting clustering of SV. Default: 1 | ++----------------------------------------+-----------------------------------------------------------------------+ +| Allele Frequency Threshold | Filters the SV calls based on the allele frequency. Default: 0.0 | ++----------------------------------------+-----------------------------------------------------------------------+ +| Minimum Homogenous Allele Frequency | Minimum homogeneous threshold on allele frequency. Default: 0.8 | ++----------------------------------------+-----------------------------------------------------------------------+ +| Minimum Heterogeneous Allele Frequency | Minimum heterogeneous threshold on allele frequency. Default: 0.3 | ++----------------------------------------+-----------------------------------------------------------------------+ + +| + +Advanced Options +---------------- + ++----------------------------------------+------------------------------------------------------------------------+ +| Parameter | Description | ++========================================+========================================================================+ +| Report BND | Reports the inversions and translocations as BND events. Default: False| ++----------------------------------------+------------------------------------------------------------------------+ +| Don't Report Seq | Don't report sequences for indels in vcf output. (Beta version!) | +| | Default: False | ++----------------------------------------+------------------------------------------------------------------------+ +| Ignore sd | Ignores the sd based filtering. Default: False | ++----------------------------------------+------------------------------------------------------------------------+ +| CCS Reads | Preset CCS Pacbio setting. (Beta) Default: False | ++----------------------------------------+------------------------------------------------------------------------+ + +| + +Parameter Estimation Options +---------------------------- + ++----------------------------------------+------------------------------------------------------------------------+ +| Parameter | Description | ++========================================+========================================================================+ +| Skip Parameter Estimation | Enables the scan if only very few reads are present. Default: False | ++----------------------------------------+------------------------------------------------------------------------+ +| Estimated Deletion Ratio | Estimated ratio of deletions per read. Default: 0.0458369 | ++----------------------------------------+------------------------------------------------------------------------+ +| Estimated Insertion Ratio | Estimated ratio of insertions per read. Default: 0.049379 | ++----------------------------------------+------------------------------------------------------------------------+ +| Maximum Differences Per Window | Maximum differences per 100bp. Default: 50 | ++----------------------------------------+------------------------------------------------------------------------+ +| Maximum Distance Between Alignment | Maximum distance between alignment (indel) events. Default: 4 | +| Events | | ++----------------------------------------+------------------------------------------------------------------------+ + +| + +Output +****** + +VCF Info field description +| +Sniffles report multiple information in the Info field. The entries are delimited by: +| ++-------------------+------------------------------------------------------------------------------------------------------+ +| IMPRECISE/PRECISE | Indicates the confidence of the exact breakpoint positions (bp). | ++-------------------+------------------------------------------------------------------------------------------------------+ +| CHR2= | The chromosome of the second breakpoint of the SV reported. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| END= | The position (bp) of the second breakpoint of the SV reported. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| ZMW= | For PacBio based reads, shows the number of ZMW that support the SV. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| SVTYPE= | The type of the SV. (see Alt field above) | ++-------------------+------------------------------------------------------------------------------------------------------+ +| SUPTYPE= | Indicates what evidence supports the SVs (SR: Split Reads, AL: Alignment, NR: Noisy Region). | ++-------------------+------------------------------------------------------------------------------------------------------+ +| STD_quant_start= | The standard deviation of the start breakpoints. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| STD_quant_stop= | The standard deviation of the stop breakpoints. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| RNAMES= | A comma separated list of read names that support the SV event. Controlled by -n Parameter. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| SVLEN= | Indicates the length of SVs. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| STRANDS= | Strand information at both breakpoints. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| SEQ= | If reportable shows the sequence of the indels. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| RE= | Number of reads supporting the variance. | ++-------------------+------------------------------------------------------------------------------------------------------+ +| AF= | Allele frequency (only if run with –genotype) | ++-------------------+------------------------------------------------------------------------------------------------------+ +| +Source: https://github.com/fritzsedlazeck/Sniffles/wiki + ]]> + </help> + <citations> + <citation type="doi">10.1038/s41592-018-0001-7</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_outcome3.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,122 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:51:47 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_outcome4.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,122 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:51:57 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_outcome5.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,122 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:52:08 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_outcome6.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,122 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:52:19 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_output.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,122 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:51:22 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected_output2.vcf Mon Sep 14 07:39:07 2020 +0000 @@ -0,0 +1,123 @@ +##fileformat=VCFv4.1 +##source=Sniffles +##fileDate=20200901:51:36 AMef_minus +##contig=<ID=1,length=249250621> +##contig=<ID=2,length=243199373> +##contig=<ID=3,length=198022430> +##contig=<ID=4,length=191154276> +##contig=<ID=5,length=180915260> +##contig=<ID=6,length=171115067> +##contig=<ID=7,length=159138663> +##contig=<ID=8,length=146364022> +##contig=<ID=9,length=141213431> +##contig=<ID=10,length=135534747> +##contig=<ID=11,length=135006516> +##contig=<ID=12,length=133851895> +##contig=<ID=13,length=115169878> +##contig=<ID=14,length=107349540> +##contig=<ID=15,length=102531392> +##contig=<ID=16,length=90354753> +##contig=<ID=17,length=81195210> +##contig=<ID=18,length=78077248> +##contig=<ID=19,length=59128983> +##contig=<ID=20,length=63025520> +##contig=<ID=21,length=48129895> +##contig=<ID=22,length=51304566> +##contig=<ID=X,length=155270560> +##contig=<ID=Y,length=59373566> +##contig=<ID=MT,length=16569> +##contig=<ID=GL000207.1,length=4262> +##contig=<ID=GL000226.1,length=15008> +##contig=<ID=GL000229.1,length=19913> +##contig=<ID=GL000231.1,length=27386> +##contig=<ID=GL000210.1,length=27682> +##contig=<ID=GL000239.1,length=33824> +##contig=<ID=GL000235.1,length=34474> +##contig=<ID=GL000201.1,length=36148> +##contig=<ID=GL000247.1,length=36422> +##contig=<ID=GL000245.1,length=36651> +##contig=<ID=GL000197.1,length=37175> +##contig=<ID=GL000203.1,length=37498> +##contig=<ID=GL000246.1,length=38154> +##contig=<ID=GL000249.1,length=38502> +##contig=<ID=GL000196.1,length=38914> +##contig=<ID=GL000248.1,length=39786> +##contig=<ID=GL000244.1,length=39929> +##contig=<ID=GL000238.1,length=39939> +##contig=<ID=GL000202.1,length=40103> +##contig=<ID=GL000234.1,length=40531> +##contig=<ID=GL000232.1,length=40652> +##contig=<ID=GL000206.1,length=41001> +##contig=<ID=GL000240.1,length=41933> +##contig=<ID=GL000236.1,length=41934> +##contig=<ID=GL000241.1,length=42152> +##contig=<ID=GL000243.1,length=43341> +##contig=<ID=GL000242.1,length=43523> +##contig=<ID=GL000230.1,length=43691> +##contig=<ID=GL000237.1,length=45867> +##contig=<ID=GL000233.1,length=45941> +##contig=<ID=GL000204.1,length=81310> +##contig=<ID=GL000198.1,length=90085> +##contig=<ID=GL000208.1,length=92689> +##contig=<ID=GL000191.1,length=106433> +##contig=<ID=GL000227.1,length=128374> +##contig=<ID=GL000228.1,length=129120> +##contig=<ID=GL000214.1,length=137718> +##contig=<ID=GL000221.1,length=155397> +##contig=<ID=GL000209.1,length=159169> +##contig=<ID=GL000218.1,length=161147> +##contig=<ID=GL000220.1,length=161802> +##contig=<ID=GL000213.1,length=164239> +##contig=<ID=GL000211.1,length=166566> +##contig=<ID=GL000199.1,length=169874> +##contig=<ID=GL000217.1,length=172149> +##contig=<ID=GL000216.1,length=172294> +##contig=<ID=GL000215.1,length=172545> +##contig=<ID=GL000205.1,length=174588> +##contig=<ID=GL000219.1,length=179198> +##contig=<ID=GL000224.1,length=179693> +##contig=<ID=GL000223.1,length=180455> +##contig=<ID=GL000195.1,length=182896> +##contig=<ID=GL000212.1,length=186858> +##contig=<ID=GL000222.1,length=186861> +##contig=<ID=GL000200.1,length=187035> +##contig=<ID=GL000193.1,length=189789> +##contig=<ID=GL000194.1,length=191469> +##contig=<ID=GL000225.1,length=211173> +##contig=<ID=GL000192.1,length=547496> +##contig=<ID=NC_007605,length=171823> +##contig=<ID=hs37d5,length=35477943> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries"> +##ALT=<ID=TRA,Description="Translocation"> +##ALT=<ID=INS,Description="Insertion"> +##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size."> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of reads supporting SVs (comma separated)"> +##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read."> +##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -."> +##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref."> +##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias."> +##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads."> +##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads."> +##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads."> +##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency."> +##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam +21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;RNAMES=21_21470395_-,21_21478921_-,21_21480008_-,21_21480363_-,21_21481423_-,21_21481887_+,21_21482167_+,21_21482571_-,21_21483235_+,21_21484045_+,21_21484064_-,21_21484222_+,21_21484629_-,21_21485143_+,21_21485316_+,21_21486018_+,21_21486410_+,21_21486549_+,21_21486783_-,21_21487049_+,21_21487332_-,21_21487543_-,21_21487743_+,21_21487803_-,21_21487987_-,21_21488230_-,21_21488258_-,21_21488405_+,21_21488511_-,21_21488850_-,21_21489305_+,21_21489335_-,21_21489385_-,21_21489559_-,21_21489628_+,21_21490045_+,21_21490154_+,21_21490154_-,21_21490705_+,21_21491059_-,21_21491140_-,21_21491170_+,21_21491195_-,21_21491512_-,21_21491645_-,21_21491660_+,21_21491808_+,21_21491999_+;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48