Mercurial > repos > rdaveau > gfap
diff gfapts/gfap_r1.0_samvcf_data_parser.pl @ 0:f753b30013e6 draft
Uploaded
author | rdaveau |
---|---|
date | Fri, 29 Jun 2012 10:20:55 -0400 |
parents | |
children | 028f435b6cfb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl Fri Jun 29 10:20:55 2012 -0400 @@ -0,0 +1,104 @@ +#!/usr/bin/perl + +use strict; +use lib 'inc/perlmod'; +use ngsutil qw[ :DEFAULT &explode_varcall ]; +use warnings FATAL => qw[ numeric uninitialized ]; +use List::Util qw[ sum min max ]; +use File::Basename; +use Getopt::Long; + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# PATH TO YOUR R-bin DIRECTORY +my $rbin = '/usr/bin/R'; +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +my $annovar_dir = 'inc/annovar'; +my $rdep = 'inc/R'; + +my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr); + +GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s"); +$varfile = $opts{varfile}; +$outdir = $opts{outdir}; +$outfile = $opts{outfile}; + +my $fname = readlink($varfile) || $varfile; +$fname = basename($fname); + +my %fh=( + 'chr1' => *chr1, 'chr2' => *chr2, 'chr3' => *chr3, 'chr4' => *chr4, 'chr5' => *chr5, + 'chr6' => *chr6, 'chr7' => *chr7, 'chr8' => *chr8, 'chr9' => *chr9, 'chr10' => *chr10, + 'chr11' => *chr11, 'chr12' => *chr12, 'chr13' => *chr13, 'chr14' => *chr14, 'chr15' => *chr15, + 'chr16' => *chr16, 'chr17' => *chr17, 'chr18' => *chr18, 'chr19' => *chr19, 'chr20' => *chr20, + 'chr21' => *chr21, 'chr22' => *chr22, 'chrX' => *chrX, 'chrY' => *chrY, 'chrM' => *chrM +); + +`${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!; + +open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh; +open IN, "<${outdir}/${fname}_Temp-00" or die $!; +while(<IN>){ + /^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/; + next if !exists $fh{$1}; + if(min(length($2), length($3))!=1){ + chomp; + @buffer=split /\s+/, $_; + @Temp=explode_varcall(@buffer[1,3..4]); + for($i=0; $i<$#{$Temp[0]}; $i+=2){ + print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n"; + } + next; + } + print{ $fh{$1} } $_; + $chr{$1}++; + } +close IN; +foreach (keys %fh){ + close($fh{$_}); + next if !exists $chr{$_}; + `sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!; + open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!; + open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!; + $_=readline(IN); + /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/; + @buffer=split /\s+/, $1.$2; + ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/; + @DP4=split /,/, $_; + push @buffer, @DP4; + @previous=@buffer; + MAINLOOP: while(<IN>){ + /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/; + @buffer=split /\s+/, $1.$2; + ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/; + @DP4=split /,/, $_; + push @buffer, @DP4; + while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){ + $previous[2]=$buffer[2]; + $previous[$_].=$buffer[$_] for 3..4; + $previous[5]='unk' if $previous[5] ne $buffer[5]; + $previous[7]='SKIP' if $previous[7] ne $buffer[7]; + for (6,8..11){ + $previous[$_]+=$buffer[$_]; + $previous[$_]/=2; + } + next MAINLOOP; + } + $previous[7]='NONE' if $previous[7] eq '.'; + $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11); + print OUT join("\t", @previous[0..6,8..11,7]), "\n"; + @Temp=@previous if eof; + @previous=@buffer; + } + $previous[7]='NONE' if $previous[7] eq '.'; + $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11); + print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2])); + close IN; + close OUT; + } +foreach (1..22, 'X', 'Y', 'M'){ + push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"}; + } +system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!; +`${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!; +system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!; \ No newline at end of file