Mercurial > repos > iuc > fermikit_variants
changeset 0:44519e762f16 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fermikit commit 16dcfc0fb84fad80fcf18417ae46c5499c96147a
author | iuc |
---|---|
date | Thu, 05 Jan 2017 08:35:48 -0500 |
parents | |
children | ed056d243455 |
files | all_fasta.loc.sample fermikit_run_calling.xml test-data/aligned_contigs.bam test-data/flt.vcf test-data/small.fa.gz test-data/sv.vcf test-data/test.fastq.gz test-data/unitigs.gz tool_data_table_conf.xml.sample |
diffstat | 9 files changed, 150 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Thu Jan 05 08:35:48 2017 -0500 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fermikit_run_calling.xml Thu Jan 05 08:35:48 2017 -0500 @@ -0,0 +1,58 @@ +<tool id="fermikit_variants" name="fermikit-variants" version="0.14.dev1"> + <description>call variants from genome-aligned contigs</description> + <requirements> + <requirement type="package" version="0.14.dev1">fermikit</requirement> + <requirement type="package" version="0.6.5">sambamba</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + #import re + #set escaped_element_identifier = re.sub('[^\w\-\s]', '_', str($bam.element_identifier)) + #set ref = $reference_genome.fasta_item.fields.path if $reference_genome.reference_genome_source == "reference" else $reference_genome.history_item + export ROOT=\$(dirname \$(type -P k8)) && + ln -f -s '$bam' '$escaped_element_identifier' && + htsbox pileup -cuf '$ref' '$escaped_element_identifier' | gzip -1 > raw.vcf.gz && + k8 "\$ROOT"/hapdip.js deovlp raw.vcf.gz | k8 "\$ROOT"/hapdip.js anno | gzip -1 > tmp.vcf.gz 2> flt.vcf.log && + k8 "\$ROOT"/hapdip.js filter -q3 tmp.vcf.gz > flt.vcf 2>> flt.vcf.log && + htsbox abreak -bcuf '$ref' <(sambamba sort -n -o /dev/stdout '$escaped_element_identifier') > sv.vcf + ]]></command> + <inputs> + <param name="bam" type="data" label="aligned contigs" help="To generate aligned contigs align fermi2 contigs with BWA mem options -B9 -O16 -L5" format="bam"/> + <conditional name="reference_genome" label="Reference genome to call variants against"> + <param name="reference_genome_source" type="select"> + <option value="reference">Use a built-in genome to call variants</option> + <option value="history">Use a genome from history to call variants</option> + </param> + <when value="history"> + <param format="fasta" label="Select a reference genome" name="history_item" type="data" /> + </when> + <when value="reference"> + <param label="Select a reference genome" name="fasta_item" type="select"> + <options from_data_table="all_fasta"> + <filter column="2" type="sort_by"/> + </options> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="structural_variants" format="vcf" label="fermikit SV on ${on_string}" from_work_dir="sv.vcf"/> + <data name="snps_indels" format="vcf" label="fermikit SNPs and short INDELs on ${on_string}" from_work_dir="flt.vcf"/> + </outputs> + <tests> + <test> + <param name="reference_genome_source" value="history"/> + <param name="history_item" value="small.fa.gz" ftype="fasta"/> + <param name="bam" value="aligned_contigs.bam"/> + <output name="structural_variants" file="sv.vcf" lines_diff="2"/> + <output name="snps_indels" file="flt.vcf" lines_diff="2"/> + </test> + </tests> + <help><![CDATA[ + +FermiKit is a de novo assembly based variant calling pipeline for deep Illumina +resequencing data. This galaxy wrapper can be used to call variants from contigs +generated by fermi2 that have subsequently been aligned to a reference genome +using bwa (options -B9 -O16 -L5 or -x intractg). + +]]></help> <citations> <citation +type="doi">10.1093/bioinformatics/btv440</citation> </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/flt.vcf Thu Jan 05 08:35:48 2017 -0500 @@ -0,0 +1,49 @@ +##fileformat=VCFv4.1 +##source=htsbox-pileup-r327 +##reference=/tmp/tmpIcvwsb/files/000/dataset_2.dat +##contig=<ID=11_1910000_1940000,length=30001> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##INFO=<ID=_DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=_DS,Number=1,Type=Integer,Description="min{alt_DP_on_forward, alt_DP_on_reverse}"> +##INFO=<ID=_AB,Number=1,Type=Integer,Description="Percentage of non-reference reads"> +##INFO=<ID=_FS,Number=1,Type=Integer,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##FILTER=<ID=DPhigh,Description="High read depth: _DP>62.82"> +##FILTER=<ID=DPlow,Description="Low read depth: _DP<3"> +##FILTER=<ID=FShigh,Description="Large Fisher-Strand bias: _FS>30"> +##FILTER=<ID=ABlow,Description="Low fraction of non-reference reads: _AB<30 at SNPs or _AB<30 at INDELs"> +##FILTER=<ID=DSlow,Description="Low double-strand support at SNPs: _DS<1"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT aligned_contigs_bam +11_1910000_1940000 636 . c T 25 . _DP=39;_AB=64 GT:AD 1/0:14,25 +11_1910000_1940000 708 . t C 40 . _DP=40;_AB=100 GT:AD 1/1:0,40 +11_1910000_1940000 1116 . t C 43 . _DP=43;_AB=100 GT:AD 1/1:0,43 +11_1910000_1940000 1891 . t C 44 . _DP=44;_AB=100 GT:AD 1/1:0,44 +11_1910000_1940000 2297 . a G 43 . _DP=43;_AB=100 GT:AD 1/1:0,43 +11_1910000_1940000 2727 . a G 42 . _DP=42;_AB=100 GT:AD 1/1:0,42 +11_1910000_1940000 3378 . a G 50 . _DP=50;_AB=100 GT:AD 1/1:0,50 +11_1910000_1940000 4140 . c T 21 . _DP=36;_AB=58 GT:AD 1/0:15,21 +11_1910000_1940000 4820 . g A 35 . _DP=35;_AB=100 GT:AD 1/1:0,35 +11_1910000_1940000 4860 . t G 36 . _DP=36;_AB=100 GT:AD 1/1:0,36 +11_1910000_1940000 4976 . g A 33 . _DP=33;_AB=100 GT:AD 1/1:0,33 +11_1910000_1940000 5455 . a AGT 12 . _DP=25;_AB=48 GT:AD 0/1:13,12 +11_1910000_1940000 5559 . g T 39 . _DP=39;_AB=100 GT:AD 1/1:0,39 +11_1910000_1940000 6369 . ct C 8 ABlow _DP=42;_AB=19 GT:AD 0/1:34,8 +11_1910000_1940000 6654 . g A 43 . _DP=43;_AB=100 GT:AD 1/1:0,43 +11_1910000_1940000 7873 . g A 45 . _DP=45;_AB=100 GT:AD 1/1:0,45 +11_1910000_1940000 8084 . t C 54 . _DP=54;_AB=100 GT:AD 1/1:0,54 +11_1910000_1940000 10894 . t G 40 . _DP=40;_AB=100 GT:AD 1/1:0,40 +11_1910000_1940000 12259 . a G 35 . _DP=35;_AB=100 GT:AD 1/1:0,35 +11_1910000_1940000 15695 . g A 36 . _DP=36;_AB=100 GT:AD 1/1:0,36 +11_1910000_1940000 16353 . ctt C 14 . _DP=14;_AB=100 GT:AD 1/1:0,14 +11_1910000_1940000 20714 . c T 24 . _DP=42;_AB=57 GT:AD 1/0:18,24 +11_1910000_1940000 24531 . t G 32 . _DP=32;_AB=100 GT:AD 1/1:0,32 +11_1910000_1940000 24546 . gt G 25 . _DP=25;_AB=100 GT:AD 1/1:0,25 +11_1910000_1940000 25710 . t TTG 17 . _DP=46;_AB=37 GT:AD 0/1:29,17 +11_1910000_1940000 25813 . c CTG,CTGTG 13 . _DP=33;_AB=70 GT:AD 1/0:10,13,10 +11_1910000_1940000 26084 . ctg C,CTGTGTG 17 . _DP=36;_AB=75 GT:AD 1/2:9,17,10 +11_1910000_1940000 26212 . g A 29 . _DP=55;_AB=53 GT:AD 1/0:26,29 +11_1910000_1940000 26370 . c CTG,CTGTGTG 12 . _DP=34;_AB=68 GT:AD 1/0:11,12,11 +11_1910000_1940000 26521 . a G 30 . _DP=30;_AB=100 GT:AD 1/1:0,30 +11_1910000_1940000 26713 . a G 32 . _DP=32;_AB=100 GT:AD 1/1:0,32 +11_1910000_1940000 27735 . tc T 41 . _DP=41;_AB=100 GT:AD 1/1:0,41 +11_1910000_1940000 28524 . c A 18 . _DP=38;_AB=47 GT:AD 0/1:20,18
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sv.vcf Thu Jan 05 08:35:48 2017 -0500 @@ -0,0 +1,16 @@ +##fileformat=VCFv4.1 +##source=htsbox-abreak-r327 +##reference=/tmp/tmpIcvwsb/files/000/dataset_2.dat +##contig=<ID=11_1910000_1940000,length=30001> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=INS,Description="Insertion"> +##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="SV length"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant"> +##INFO=<ID=QGAP,Number=1,Type=Integer,Description="Length of gap on the query sequence"> +##INFO=<ID=MINMAPQ,Number=1,Type=Integer,Description="Min flanking mapping quality"> +##INFO=<ID=MINSC,Number=1,Type=Integer,Description="Min flanking alignment score"> +##INFO=<ID=MINTIPQ,Number=1,Type=Integer,Description="Min quality/depth flanking the break point"> +##FILTER=<ID=LowSupp,Description="MINTIPQ < 10"> +#CHROM POS ID REF ALT QUAL FILTER INFO +11_1910000_1940000 5276 . C <DEL> 30 . SVTYPE=DEL;END=26956;SVLEN=21678;QGAP=3;MINMAPQ=60;MINSC=793;MINTIPQ=23
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jan 05 08:35:48 2017 -0500 @@ -0,0 +1,9 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> +