# HG changeset patch # User nml # Date 1566923447 14400 # Node ID c27b4346352f7af925c3fc80b4695fe0549f4446 "planemo upload for repository https://github.com/phac-nml/snvphyl-galaxy commit 90a172f1fc12b9c4d73f4c924a8c0c5a559589d0" diff -r 000000000000 -r c27b4346352f filter-density.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-density.xml Tue Aug 27 12:30:47 2019 -0400 @@ -0,0 +1,52 @@ + + Identify high density positions within indvidual genomes + + snvphyl-tools + + + bcftools plugin filter_snv_density $vcf -O b -o $filtered_bcf -- --filename $vcf --region_file $out + #if $window_size: + --window_size $window_size + #end if + #if $threshold: + --threshold $threshold + #end if + + + + + + + + + + + + + + + + + + + + +What it does +============ + +This script will indentify all high density SNV regions in an isolate genome, marking them as filtered-density in the associated bcf, if desired. + + +Usage +===== + +**Parameters** + - vcf - A vcf file to be analyzed for SNV density. + - threshold - The threshold distance between SNV's in order for them to be considered 'high density' + - window_size - The size of the window, in base pairs, that will be looked at at any given time to calculate density. + + + + + + diff -r 000000000000 -r c27b4346352f test-data/1.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.vcf Tue Aug 27 12:30:47 2019 -0400 @@ -0,0 +1,38 @@ +##fileformat=VCFv4.2 +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##FILTER= +##INFO= +##FORMAT= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA0001 BBB0002 +2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1 +2 114 . TC TTCC,TTC 999 PASS INDEL;AN=4;AC=2,2 GT:DP 1/2:1 1/2:1 +2 115 . C T 999 PASS INDEL;AN=4;AC=4 GT:DP 1/1:1 1/1:1 +20 3 . G CT 999 PASS INDEL;AN=4;AC=2 GT 0/1 0/1 +20 3 . GATG GACT 999 PASS INDEL;AN=4;AC=2 GT 1/0 1/0 +20 5 . TGGG TAC,TG,TGGGG,AC . PASS INDEL;AN=4;AC=2,2,0,0 GT:PL:DP 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 1/2:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15:1 +20 59 . AG . 999 PASS AN=4 GT:PL:DP 0/0:0:4 0/0:0:4 +20 80 . CACAG CACAT 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 81 . A C 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 95 . TCACCG ACACCG 999 PASS AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 95 . TCACCG AAAAAA 999 Test AN=4;AC=2 GT:PL:DP 0/1:255,0,255:13 0/1:255,0,255:13 +20 273 . CAAAAAAAAAAAAAAAAAAAAA CAAAAAAAAAAAAAAAAAAAAAAA,CAAAAAAAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=4;AC=2,2 GT:PL:DP 1/2:0,3,5,3,5,5:1 1/2:0,3,5,3,5,5:1 +20 274 . AAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0 +20 278 . AAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAA 999 PASS INDEL;AN=0;AC=0 GT:PL:DP ./.:0,0,0:0 ./.:0,0,0:0 +3 10 . GTGGAC GTGGACACAC,GTGGACAC,GTGGACACACAC,GTGG,GTGGACACACACAC,ATGGACACACAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +3 15 . CACA CAC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +4 21 . ATTTTTTTTTTTTTTTC ATTTTTTTTTTTTTTC,ATTTTTTTTTTTTTTTT,ATTTTTTTTTTTTTTTTC 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 +5 22 . A AGA 999 PASS INDEL;AN=0 GT:DP ./.:0 ./.:0 diff -r 000000000000 -r c27b4346352f test-data/bcf1.bcf Binary file test-data/bcf1.bcf has changed diff -r 000000000000 -r c27b4346352f test-data/density_regions.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/density_regions.txt Tue Aug 27 12:30:47 2019 -0400 @@ -0,0 +1,5 @@ +2 101 115 +20 3 95 +20 273 278 +3 10 15 +#Calculation and writing of high density regions has completed.