Mercurial > repos > iuc > jasminesv
changeset 0:630e2929a131 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/jasminesv/ commit eb5baa10589b31c422ec8b8980617a3f375608ad"
author | iuc |
---|---|
date | Wed, 20 Jan 2021 19:49:40 +0000 |
parents | |
children | 2b62154e39c8 |
files | jasminesv.xml macros.xml test-data/a.vcf test-data/all_fasta.loc test-data/b.vcf test-data/c.vcf test-data/chr_norm_file.txt test-data/d.vcf test-data/genome.fa test-data/out1.vcf tool_data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 13 files changed, 423 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jasminesv.xml Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,197 @@ +<?xml version="1.0"?> +<tool id="jasminesv" name="JasmineSV" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Merge structural variants across samples</description> + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements"/> + <expand macro="version_command"/> + + <command detect_errors="exit_code"><![CDATA[ + #if $dup_to_ins.dup_to_ins: + @REF_FASTA@ + #end if + + jasmine + ## Optional params + 'max_dist=${max_dist}' + #if float($max_dist_linear) != 0.0: + 'max_dist_linear=${max_dist_linear}' + #end if + 'min_dist=${min_dist}' + 'kd_tree_norm=${kd_tree_norm}' + 'min_seq_id=${min_seq_id}' + 'k_jaccard=${k_jaccard}' + 'max_dup_length=${max_dup_length}' + 'min_support=${min_support}' + threads=\${GALAXY_SLOTS:-4} + 'spec_reads=${spec_reads}' + 'spec_len=${spec_len}' + #if $dup_to_ins.dup_to_ins: + 'genome_file=reference.fa' + #end if + + ## Flags: + '${ignore_strand}' + '${ignore_type}' + #if $dup_to_ins.dup_to_ins: + '${dup_to_ins}' + #end if + '${mark_specific}' + '${pre_normalize}' + '${use_edit_dist}' + '${preprocess_only}' + '${postprocess_only}' + '${keep_var_ids}' + '${use_end}' + '${output_genotypes}' + '${ignore_merged_inputs}' + '${centroid_merging}' + '${clique_merging}' + '${allow_intrasample}' + '${normalize_type}' + '${leave_breakpoints}' + '${require_first_sample}' + + '${normalize.normalize_chrs}' + #if $normalize.normalize_chrs and $normalize.chr_norm_file: + 'chr_norm_file=${normalize.chr_norm_file}' + #end if + + ## Required args + file_list='${vcffilelist}' + out_file='${out_vcf}' + ]]></command> + <configfiles> + <configfile name="vcffilelist"># +#for $vcf_file in $vcf_list: +${vcf_file} +#end for + </configfile> + </configfiles> + <inputs> + <!--TODO in future versions (?)- + add IrisSV support for post-processing. For now just make it accessible as a separate tool and allow users to run independently + --> + <!-- + Input files + --> + <param name="vcf_list" type="data" multiple="true" format="vcf" label="VCF file(s) to merge" help=""/> + <!-- + Params + --> + <param argument="max_dist" type="integer" value="1000" min="0" label="The maximum distance variants can be apart when being merged" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/> + <param argument="min_dist" type="integer" value="-1" min="-1" label="The minimum distance threshold a variant can have when using max_dist_linear" /> + <param argument="max_dist_linear" type="float" value="0." min="0.0" label="Make max_dist this proportion of the length of each variant" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/> + <param argument="kd_tree_norm" type="integer" value="2" min="1" label="The power to use in kd-tree distances (1 is Manhattan, 2 is Euclidean, etc.)" /> + <param argument="min_seq_id" type="float" value="0." min="0." label="The minimum sequence identity for two insertions to be merged" /> + <param argument="k_jaccard" type="integer" value="9" min="1" label="The kmer size to use when computing Jaccard similarity of insertions" /> + <param argument="max_dup_length" type="integer" value="10000" min="0" label="The maximum length of duplication that can be converted to an insertion" /> + <param argument="min_support" type="integer" value="1" min="1" label="The minimum number of callsets a variant must be in to be output" /> + <param argument="spec_reads" type="integer" value="10" min="1" label="The minimum number of reads a variant needs to be in the specific callset" /> + <param argument="spec_len" type="integer" value="30" min="1" label="The minimum length a variant needs to be in the specific callset" /> + <!-- + Flags + --> + <param argument="--ignore_strand" type="boolean" checked="false" truevalue="--ignore_strand" falsevalue="" label="Allow variants with different strands to be merged" /> + <param argument="--ignore_type" type="boolean" checked="false" truevalue="--ignore_type" falsevalue="" label="Allow variants with different types to be merged" /> + <conditional name="dup_to_ins"> + <param argument="--dup_to_ins" type="select" checked="false" label="Convert duplications to insertions for SV merging and then convert them back?" help="Requires reference genome" > + <option value="--dup_to_ins">Convert duplications to insertions for SV merging and then convert them back</option> + <option value="" selected="true">Don't convert duplications to insertions for SV merging</option> + </param> + <when value="--dup_to_ins"> + <expand macro="reference"/> + </when> + <when value=""/> + </conditional> + <param argument="--mark_specific" type="boolean" checked="false" truevalue="--mark_specific" falsevalue="" label="Mark calls in the original VCF files that have enough support to called specific" /> + <param argument="--pre_normalize" type="boolean" checked="false" truevalue="--pre_normalize" falsevalue="" label="Run type normalization before merging" /> + <param argument="--use_edit_dist" type="boolean" checked="false" truevalue="--use_edit_dist" falsevalue="" label="Use edit distance for comparing insertion sequences instead of Jaccard" /> + <param argument="--preprocess_only" type="boolean" checked="false" truevalue="--preprocess_only" falsevalue="" label="Only run the preprocessing and not the actual merging or post-processing" /> + <param argument="--postprocess_only" type="boolean" checked="false" truevalue="--postprocess_only" falsevalue="" label="Only run the postprocessing and not the actual merging or pre-processing" /> + <param argument="--keep_var_ids" type="boolean" checked="false" truevalue="--keep_var_ids" falsevalue="" label="Don't change variant IDs (should only be used if input IDs are unique across samples)" /> + <param argument="--use_end" type="boolean" checked="false" truevalue="--use_end" falsevalue="" label="Use the end coordinate as the second coordinate instead of the variant length" /> + <param argument="--output_genotypes" type="boolean" checked="false" truevalue="--output_genotypes" falsevalue="" label="Print the genotypes of the consensus variants in all of the samples they came from" /> + <param argument="--ignore_merged_inputs" type="boolean" checked="false" truevalue="--ignore_merged_inputs" falsevalue="" label="Ignore merging info such as support vectors which is already present in the inputs" /> + <param argument="--centroid_merging" type="boolean" checked="false" truevalue="--centroid_merging" falsevalue="" label="Require every group to have a centroid which is within the distance threshold of each variant" /> + <param argument="--clique_merging" type="boolean" checked="false" truevalue="--clique_merging" falsevalue="" label="Require every group to have each pair within in it be mergeable" /> + <param argument="--allow_intrasample" type="boolean" checked="false" truevalue="--allow_intrasample" falsevalue="" label="Allow variants in the same sample to be merged" /> + <param argument="--normalize_type" type="boolean" checked="false" truevalue="--normalize_type" falsevalue="" label="Convert all variants to INS/DEL/DUP/INV/TRA" /> + <param argument="--leave_breakpoints" type="boolean" checked="false" truevalue="--leave_breakpoints" falsevalue="" label="Leave breakpoints as they are even if they are inconsistent" /> + <param argument="--require_first_sample" type="boolean" checked="false" truevalue="--require_first_sample" falsevalue="" label="Only output merged variants which include a variant from the first sample" /> + <conditional name="normalize"> + <param argument="--normalize_chrs" type="select" checked="false" label="Whether to normalize chromosome names" help="(to NCBI standards - without 'chr' - by default)"> + <option value="--normalize_chrs">Normalize chromosome names</option> + <option value="" selected="true">Don't normalize chromosome names</option> + </param> + <when value="--normalize_chrs"> + <param name="chr_norm_file" type="data" format="txt,tsv" value="" label="A file containing chromosome name mappings" optional="true"/> + </when> + <when value=""/> + </conditional> + </inputs> + <outputs> + <!-- standard --> + <data name="out_vcf" format="vcf" label="${tool.name} on ${on_string}: Result"/> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="1"> + <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/> + <output name="out_vcf" file="out1.vcf"/> + </test> + <test expect_num_outputs="1"> + <param name="vcf_list" value="c.vcf,d.vcf" ftype="vcf"/> + <conditional name="normalize"> + <param name="normalize_chrs" value="--normalize_chrs"/> + <param name="chr_norm_file" value="chr_norm_file.txt"/> + </conditional> + <output name="out_vcf" file="out1.vcf"/> + </test> + <test expect_num_outputs="1"> + <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/> + <conditional name="dup_to_ins"> + <param name="dup_to_ins" value="--dup_to_ins"/> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="genome.fa"/> + </conditional> + </conditional> + <output name="out_vcf" file="out1.vcf"/> + </test> + <test expect_num_outputs="1"> + <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/> + <conditional name="dup_to_ins"> + <param name="dup_to_ins" value="--dup_to_ins"/> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="jasmine"/> + </conditional> + </conditional> + <output name="out_vcf" file="out1.vcf"/> + </test> + + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +**Input** + +- Multiple VCF files to be merged + +**Output** + +- Merged Variants (VCF) + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,66 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">1.0.11</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.01</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">jasminesv</requirement> + </requirements> + </xml> + <xml name="version_command"> + <version_command>jasmine</version_command> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex">@online{jasmine, + author = {Melanie Kirsche}, + title = {jasmine}, + year = 2021, + url = {https://github.com/mkirsche/Jasmine}, + urldate = {2021-01-13} + }</citation> + </citations> + </xml> + <!-- + Command + --> + <token name="@REF_FASTA@"><![CDATA[ + #if $dup_to_ins.reference_source.reference_source_selector == 'history': + ln -f -s '$dup_to_ins.reference_source.ref_file' reference.fa && + #else: + ln -f -s '$dup_to_ins.reference_source.ref_file.fields.path' reference.fa && + #end if + ]]></token> + + <xml name="reference"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from history</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No reference genomes are available"/> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta,fastq" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference"/> + </when> + </conditional> + </xml> + <!-- + Help + --> + + <token name="@WID@"><![CDATA[ +*Jasmine*, or Jointly Accurate Sv Merging with Intersample Network Edges is a tool used to merge structural variants (SVs) across samples. Each sample has a number of SV calls, consisting of position information (chromosome, start, end, length), type and strand information, and a number of other values. Jasmine represents the set of all SVs across samples as a network, and uses a modified minimum spanning forest algorithm to determine the best way of merging the variants such that each merged variants represents a set of analogous variants occurring in different samples. +]]></token> + <token name="@REFERENCES@"><![CDATA[ +More information is available in the `github <https://github.com/mkirsche/Jasmine>`_. + ]]></token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/a.vcf Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##contig=<ID=1,length=400> +##contig=<ID=2,length=400> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1 +1 200 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1 +1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +jasmine jasmine jasmine ${__HERE__}/genome.fa \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/b.vcf Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##contig=<ID=1,length=400> +##contig=<ID=2,length=400> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGT C . PASS PRECISE;CHR2=1;END=132;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1 +1 205 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGTACGT . PASS PRECISE;CHR2=1;END=205;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1 +1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/c.vcf Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##contig=<ID=1,length=400> +##contig=<ID=2,length=400> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +chr1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1 +chr1 200 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1 +chr1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chr_norm_file.txt Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,2 @@ +chr1 1 +chr2 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/d.vcf Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##contig=<ID=1,length=400> +##contig=<ID=2,length=400> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +chr1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGT C . PASS PRECISE;CHR2=1;END=132;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1 +chr1 205 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGTACGT . PASS PRECISE;CHR2=1;END=205;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1 +chr1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fa Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,20 @@ +>1 +TAACCCTAACACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC +TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAA +CACGTACGTACGTACGTACGTACGTACTGACGTACGT +AACCCTAACCCAA +CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC +CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCT +CACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCC +TAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCA +ACCCCAACCCCAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAAC +>2 +TAACCCTAACACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC +TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAA +CACGTACGTACGTACGTACGTACGTACTGACGTACGT +AACCCTAACCCAA +CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC +CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCT +CACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCC +TAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCA +ACCCCAACCCCAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAAC \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out1.vcf Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,27 @@ +##fileformat=VCFv4.1 +##contig=<ID=1,length=400> +##contig=<ID=2,length=400> +##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant"> +##INFO=<ID=RE,Number=1,Type=Integer,Description="read support"> +##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific"> +##INFO=<ID=SUPP_VEC,Number=1,Type=String,Description="Vector of supporting samples"> +##INFO=<ID=SUPP_VEC_EXT,Number=1,Type=String,Description="Vector of supporting samples, potentially extended across multiple merges"> +##INFO=<ID=SUPP,Number=1,Type=String,Description="Number of samples supporting the variant"> +##INFO=<ID=SUPP_EXT,Number=1,Type=String,Description="Number of samples supporting the variant, potentially extended across multiple merges"> +##INFO=<ID=IDLIST,Number=.,Type=String,Description="Variant IDs of variants merged to make this call (at most 1 per sample)"> +##INFO=<ID=IDLIST_EXT,Number=.,Type=String,Description="Variant IDs of variants merged, potentially extended across multiple merges"> +##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=""> +##INFO=<ID=STARTVARIANCE,Number=1,Type=String,Description="Variance of start position for variants merged into this one"> +##INFO=<ID=ENDVARIANCE,Number=1,Type=String,Description="Variance of end position for variants merged into this one"> +##INFO=<ID=AVG_START,Number=1,Type=String,Description="Average start position for variants merged into this one"> +##INFO=<ID=AVG_END,Number=1,Type=String,Description="Average end position for variants merged into this one"> +##INFO=<ID=AVG_LEN,Number=1,Type=String,Description="Average length for variants merged into this one"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +1 100 0_1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1;STARTVARIANCE=0.000000;ENDVARIANCE=4.000000;AVG_LEN=-34.000000;AVG_START=100.000000;AVG_END=134.000000;SUPP_VEC_EXT=11;IDLIST_EXT=1,1;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=1,1 GT 1/1 +1 200 0_2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1;STARTVARIANCE=6.250000;ENDVARIANCE=6.250000;AVG_LEN=38.000000;AVG_START=202.500000;AVG_END=202.500000;SUPP_VEC_EXT=11;IDLIST_EXT=2,2;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=2,2 GT 1/1 +1 300 0_3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1;STARTVARIANCE=0.000000;ENDVARIANCE=0.000000;AVG_LEN=1.000000;AVG_START=300.000000;AVG_END=3000000.000000;SUPP_VEC_EXT=11;IDLIST_EXT=3,3;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=3,3 GT 1/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data/all_fasta.loc.sample Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jan 20 19:49:40 2021 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables> \ No newline at end of file