Mercurial > repos > nml > filter_density
changeset 0:c27b4346352f draft
"planemo upload for repository https://github.com/phac-nml/snvphyl-galaxy commit 90a172f1fc12b9c4d73f4c924a8c0c5a559589d0"
author | nml |
---|---|
date | Tue, 27 Aug 2019 12:30:47 -0400 |
parents | |
children | c9a43abcd993 |
files | filter-density.xml test-data/1.vcf test-data/bcf1.bcf test-data/density_regions.txt |
diffstat | 4 files changed, 95 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-density.xml Tue Aug 27 12:30:47 2019 -0400 @@ -0,0 +1,52 @@ +<tool id="filterdensity" name="Filter Density" version ="1.8.2"> + <description>Identify high density positions within indvidual genomes</description> + <requirements> + <requirement type="package" version="1.8.2">snvphyl-tools</requirement> + </requirements> + <command detect_errors="exit_code"> + bcftools plugin filter_snv_density $vcf -O b -o $filtered_bcf -- --filename $vcf --region_file $out + #if $window_size: + --window_size $window_size + #end if + #if $threshold: + --threshold $threshold + #end if + </command> + <inputs> + <param name="vcf" type="data" label="Input vcf file" format="vcf" /> + <param name="window_size" type="integer" label="Size of search window" format="" optional="true"/> + <param name="threshold" type="integer" label="Density threshold cutoff" format="" optional="true"/> + </inputs> + <outputs> + <data format="txt" name="out" label="High density regions"/> + <data format="bcf" name="filtered_bcf"/> + </outputs> + <tests> + <test> + <param name="threshold" value="2"/> + <param name="window_size" value="100"/> + <param name="vcf" value="1.vcf"/> + <output name="out" file="density_regions.txt"/> + </test> + </tests> + + <help> +What it does +============ + +This script will indentify all high density SNV regions in an isolate genome, marking them as filtered-density in the associated bcf, if desired. + + +Usage +===== + +**Parameters** + - vcf - A vcf file to be analyzed for SNV density. + - threshold - The threshold distance between SNV's in order for them to be considered 'high density' + - window_size - The size of the window, in base pairs, that will be looked at at any given time to calculate density. + </help> + + <citations> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.vcf Tue Aug 27 12:30:47 2019 -0400 @@ -0,0 +1,38 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled likelihood"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Depth"> +##contig=<ID=1,length=2147483647> +##contig=<ID=2,length=2147483647> +##contig=<ID=3,length=2147483647> +##contig=<ID=4,length=2147483647> +##contig=<ID=5,length=2147483647> +##contig=<ID=20,length=2147483647> +##FILTER=<ID=Test,Description="Test filter"> +##INFO=<ID=DP2,Number=2,Type=Integer,Description="Depth"> +##FORMAT=<ID=DP2,Number=2,Type=Integer,Description="Depth"> +##contig=<ID=12,length=123456789> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA0001 BBB0002 +2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1 +2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1 +2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1 +20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1 +20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0 +20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 +20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4 +20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 95 . TCACCG AAAAAA 999 Test AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1 +20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0 +20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0 +3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0