Mercurial > repos > rdaveau > gfap
diff gfapts/gfap_r1.0_known_var_finder.pl @ 0:f753b30013e6 draft
Uploaded
author | rdaveau |
---|---|
date | Fri, 29 Jun 2012 10:20:55 -0400 |
parents | |
children | 028f435b6cfb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfapts/gfap_r1.0_known_var_finder.pl Fri Jun 29 10:20:55 2012 -0400 @@ -0,0 +1,98 @@ +#!/usr/bin/perl + +use strict; +use lib 'inc/perlmod'; +use ngsutil qw[ :DEFAULT &varscan ]; +use warnings FATAL => qw[ numeric uninitialized ]; +use File::Basename; +use Getopt::Long; + +my($varfile, $buildver, $outdir, $dir_1000g, $dir_dbsnp, $dir_cosmic, $release_1000g, $release_dbsnp, $release_cosmic, $outfile, $k, @buffer, @varlist, %opts, %varlist); + +GetOptions(\%opts, "varfile=s", "buildver=s", "outdir=s", "dir_1000g=s", "dir_dbsnp=s", "dir_cosmic=s", "release_1000g=s", "release_dbsnp=s", "release_cosmic=s", "outfile=s"); +$varfile = $opts{varfile}; +$buildver = $opts{buildver}; +$outdir = $opts{outdir}; +$dir_1000g = $opts{dir_1000g}; +$dir_dbsnp = $opts{dir_dbsnp}; +$dir_cosmic = $opts{dir_cosmic}; +$release_1000g = $opts{release_1000g}; +$release_dbsnp = $opts{release_dbsnp}; +$release_cosmic = $opts{release_cosmic}; +$outfile = $opts{outfile}; + +my $fname = readlink($varfile) || $varfile; +$fname = basename($fname); + +my %k=( + '1000g' => { + 'dir' => $dir_1000g, 'release' => $release_1000g, 'value' => join(':', ('0.00000')x5), 'header' => join(':', 'AF_ALL', 'AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR') + }, 'dbsnp' => { + 'dir' => $dir_dbsnp, 'release' => $release_dbsnp, 'value' => join(':', ('na')x2), 'header' => join(':', 'rs', 'dbsnp') + }, 'cosmic_var' => { + 'dir' => $dir_cosmic, 'release' => $release_cosmic, 'value' => join(':', '0.00000', 'na'), 'header' => join(':', 'AF_COS', 'cid') + } +); + +my %legend=( + 'chr' => 'chromosome identifier', + 'start' => "${buildver} 1-based start position", + 'end' => "${buildver} 1-based end position", + 'ref' => 'reference allele', + 'alt' => 'alternate allele', + 'QC' => 'Phred-scaled call quality', + 'NRF' => '#reads consistent w/ the reference allele on the F-strand', + 'NRR' => '#reads consistent w/ the reference allele on the R-strand', + 'NAF' => '#reads consistent w/ the alternate allele on the F-strand', + 'NAR' => '#reads consistent w/ the alternate allele on the R-strand', + 'DP' => 'total #reads in call ie. NRF+NRR+NAF+NAR', + 'AD' => 'total #reads consistent w/ the alternate allele ie. NAF+NAR', + 'AF' => 'alternate allele ratio ie. AD/DP', + 'VCF.FILTER' => 'FILTER field from the input vcf file', + 'DPT.FILTER' => 'check for heterogeneous depth in substituted blocks', + 'VAR.FILTER' => 'GFAP default FILTER to discriminate between TP and FP variants', + 'P.str' => 'NRF+NAF vs. NRR+NAR binomial test P-value ie. total strand bias', + 'P.ref' => 'NRF vs. NRR binomial test P-value ie. reference allele strand bias', + 'P.alt' => 'NAF vs. NAR binomial test P-value ie. alternate allele strand bias', + 'AF_ALL' => "global AF in ${release_1000g} 1000g data", + 'AF_AFR' => "AF in AFR ${release_1000g} 1000g data", + 'AF_AMR' => "AF in AMR ${release_1000g} 1000g data", + 'AF_ASN' => "AF in ASN ${release_1000g} 1000g data", + 'AF_EUR' => "AF in EUR ${release_1000g} 1000g data", + 'AF_COS' => "AF in ${release_cosmic} cosmic data", + 'rs' => "dbsnp rs identifier(s) from ${release_dbsnp} release", + 'dbsnp' => "dbsnp build version(s) from ${release_dbsnp} release", + 'cid' => "cosmic mutation identifier from ${release_cosmic} release" +); +my @header=('chr', 'start', 'end', 'ref', 'alt', 'DPT.FILTER', 'QC', 'NRF', 'NRR', 'NAF', 'NAR', 'VCF.FILTER', 'P.str', 'P.ref', 'P.alt', 'DP', 'AD', 'AF', 'VAR.FILTER'); +my @k=qw[ 1000g dbsnp cosmic_var ]; + +open IN, "<$varfile" or die $!; +while(<IN>){ + chomp; + @buffer=split /\s+/, $_; + $buffer[0]=~s/^chr(.+)$/$1/; + push @varlist, ($k=join(':', @buffer[0..2])); + shift(@buffer) for 0..2; + $varlist{$k}->{$_}=shift(@buffer) foreach qw[ ref alt ]; + $varlist{$k}->{cov}=join(':', (($buffer[0] eq 'unk')?'SKIP':'PASS'), @buffer[1..$#buffer]); + } +close IN; + +foreach $k (@k){ + push @header, split(/:/, $k{$k}->{header}); + varscan($k, $k{$k}->{file}, \%varlist); + } + +my @idx=(0..4,7..10,15..17,6,12..14,11,5,18..23,26..27,24..25); +open OUT, ">${outdir}/${fname}.dbi" or die $!; +print OUT '#', join(' = ', $_, $legend{$_}), "\n" foreach @header[@idx]; +print OUT '#', join("\t", @header[@idx]), "\n"; +foreach $k (@varlist){ + @buffer=(split(/:/, 'chr'.$k), $varlist{$k}->{ref}, $varlist{$k}->{alt}); + push @buffer, split(/:/, ($varlist{$k}->{$_} || $k{$_}->{value})) foreach ('cov', @k); + print OUT join("\t", @buffer[@idx]), "\n"; + } +close OUT; + +system "rm $outfile; ln -s ${outdir}/${fname}.dbi $outfile" and die $!; \ No newline at end of file