diff gfapts/gfap_r1.0_samvcf_data_parser.pl @ 0:f753b30013e6 draft

Uploaded
author rdaveau
date Fri, 29 Jun 2012 10:20:55 -0400
parents
children 028f435b6cfb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gfapts/gfap_r1.0_samvcf_data_parser.pl	Fri Jun 29 10:20:55 2012 -0400
@@ -0,0 +1,104 @@
+#!/usr/bin/perl
+
+use strict;
+use lib 'inc/perlmod';
+use ngsutil qw[ :DEFAULT &explode_varcall ];
+use warnings FATAL => qw[ numeric uninitialized ];
+use List::Util qw[ sum min max ];
+use File::Basename;
+use Getopt::Long;
+
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+#	PATH TO YOUR R-bin DIRECTORY
+my $rbin = '/usr/bin/R';
+#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+my $annovar_dir = 'inc/annovar';
+my $rdep = 'inc/R';
+
+my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr);
+
+GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s");
+$varfile = $opts{varfile};
+$outdir  = $opts{outdir};
+$outfile = $opts{outfile};
+
+my $fname = readlink($varfile) || $varfile;
+$fname = basename($fname);
+
+my %fh=(
+	'chr1' => *chr1,	'chr2' => *chr2,	'chr3' => *chr3,	'chr4' => *chr4,	'chr5' => *chr5,
+	'chr6' => *chr6,	'chr7' => *chr7,	'chr8' => *chr8,	'chr9' => *chr9,	'chr10' => *chr10,
+	'chr11' => *chr11,	'chr12' => *chr12,	'chr13' => *chr13,	'chr14' => *chr14,	'chr15' => *chr15,
+	'chr16' => *chr16,	'chr17' => *chr17,	'chr18' => *chr18,	'chr19' => *chr19,	'chr20' => *chr20,
+	'chr21' => *chr21,	'chr22' => *chr22,	'chrX' => *chrX,	'chrY' => *chrY,	'chrM' => *chrM
+);
+
+`${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!;
+
+open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh;
+open IN, "<${outdir}/${fname}_Temp-00" or die $!;
+while(<IN>){
+		/^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/;
+		next if !exists $fh{$1};
+		if(min(length($2), length($3))!=1){
+				chomp;
+				@buffer=split /\s+/, $_;
+				@Temp=explode_varcall(@buffer[1,3..4]);
+				for($i=0; $i<$#{$Temp[0]}; $i+=2){
+						print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n";
+					}
+				next;
+			}
+		print{ $fh{$1} } $_;
+		$chr{$1}++;
+	}
+close IN;
+foreach (keys %fh){
+		close($fh{$_});
+		next if !exists $chr{$_};
+		`sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!;
+		open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!;
+		open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!;
+		$_=readline(IN);
+		/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
+		@buffer=split /\s+/, $1.$2;
+		($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
+		@DP4=split /,/, $_;
+		push @buffer, @DP4;
+		@previous=@buffer;
+		MAINLOOP: while(<IN>){
+				/^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
+				@buffer=split /\s+/, $1.$2;
+				($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
+				@DP4=split /,/, $_;
+				push @buffer, @DP4;
+				while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){
+						$previous[2]=$buffer[2];
+						$previous[$_].=$buffer[$_] for 3..4;
+						$previous[5]='unk' if $previous[5] ne $buffer[5];
+						$previous[7]='SKIP' if $previous[7] ne $buffer[7];
+						for (6,8..11){
+								$previous[$_]+=$buffer[$_];
+								$previous[$_]/=2;
+							}
+						next MAINLOOP;
+					}
+				$previous[7]='NONE' if $previous[7] eq '.';
+				$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
+				print OUT join("\t", @previous[0..6,8..11,7]), "\n";
+				@Temp=@previous if eof;
+				@previous=@buffer;
+			}
+		$previous[7]='NONE' if $previous[7] eq '.';
+		$previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
+		print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2]));
+		close IN;
+		close OUT;
+	}
+foreach (1..22, 'X', 'Y', 'M'){
+		push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"};
+	}
+system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!;
+`${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!;
+system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!;
\ No newline at end of file