annotate gfapts/gfap_r1.0_samvcf_data_parser.pl @ 1:028f435b6cfb draft default tip

Uploaded
author rdaveau
date Fri, 03 Aug 2012 05:50:41 -0400
parents f753b30013e6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
1 #!/usr/bin/perl
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
2
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
3 use strict;
1
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
4 # use lib 'inc/perlmod';
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
5 # use ngsutil qw[ :DEFAULT &explode_varcall ];
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
6 use warnings FATAL => qw[ numeric uninitialized ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
7 use List::Util qw[ sum min max ];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
8 use File::Basename;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
9 use Getopt::Long;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
10
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
11 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
12 # PATH TO YOUR R-bin DIRECTORY
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
13 my $rbin = '/usr/bin/R';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
14 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
15
1
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
16 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
17 # TEMP include ngsutil.pm
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
18 sub explode_varcall{
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
19 my $N=0;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
20 $_=shift @_ foreach my($POS, $REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
21 $_=$POS foreach my($START, $END);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
22 my(@length, @range, @idx, @VAR, @POS);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
23 @{$_}=() foreach (\@length, \@range, \@idx, \@VAR, \@POS);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
24 push @length, length($_) foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
25 @range=sort{ $a<=>$b } @length;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
26 if($range[0]==1){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
27 if($range[1]!=1){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
28 foreach ($REF, $ALT){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
29 $_=substr($_, 1);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
30 $_=~s/^$/-/;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
31 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
32 if($length[0]!=1){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
33 $END+=$length[0]-1;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
34 $START++;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
35 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
36 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
37 push @POS, $START, $END;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
38 push @VAR, $REF, $ALT;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
39 }else{
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
40 my @N=();
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
41 undef $_ foreach my ($i, $VAR);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
42 $_-=2 foreach (@length, @range);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
43 $_++ foreach ($START, $END);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
44 $_=substr($_, 1) foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
45 my $indel='-' x ($range[1]-$range[0]);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
46 $VAR.=($_>$range[0])?
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
47 ('-'):((substr($REF, $_, 1) ne substr($ALT, $_, 1))?
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
48 0:1) for 0 .. $range[1];
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
49 $N++ while $VAR =~ /0/g;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
50 if($length[0]<$length[1]){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
51 @VAR=($VAR);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
52 @N=($N);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
53 $N=0;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
54 undef($VAR);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
55 $VAR.=($_>$range[0])?
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
56 ('-'):((substr($REF, $length[0]-$_, 1) ne substr($ALT, $length[1]-$_, 1))?
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
57 0:1) for reverse 0 .. $range[1];
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
58 $N++ while $VAR =~ /0/g;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
59 if($N>=$N[0]){ $N=shift(@N); $VAR=shift(@VAR); }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
60 else{ $REF=$indel . $REF; }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
61 }else{ $ALT.=$indel; }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
62 foreach (qw[ 0 \- ]){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
63 push @idx, [ $-[0], $+[0]-$-[0] ] while ($VAR =~ /$_+/g);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
64 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
65 @{$_}=() foreach (\@VAR, \@POS);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
66 foreach my $k (@idx){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
67 push @VAR, substr($_, ${$k}[0], ${$k}[1]) || '-' foreach ($REF, $ALT);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
68 push @POS, ${$k}[0], sum(@{$k})-1;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
69 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
70 $_+=$START foreach @POS;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
71 $_=~s/\-+/\-/ foreach @VAR;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
72 for($i=0; $i<$#POS; $i+=2){ $POS[$i+1]=$POS[$i] if $VAR[$i] eq '-'; }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
73 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
74 return(\@POS, \@VAR);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
75 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
76
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
77 sub varscan{
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
78 $_=shift @_ foreach my($kname, $fpath, $href);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
79 my($k, @buffer);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
80 open IN, "<$fpath" or die $!;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
81 while(<IN>){
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
82 next if /^#/;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
83 chomp;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
84 @buffer=split /\s+/, $_;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
85 next if !exists $$href{($k=join(':', @buffer[0..2]))};
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
86 next if $$href{$k}->{ref} !~ $buffer[3];
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
87 next if $$href{$k}->{alt} !~ $buffer[4];
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
88 splice(@buffer, 0, 5);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
89 $$href{$k}->{$kname}=join(':', @buffer);
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
90 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
91 close IN;
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
92 }
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
93 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
028f435b6cfb Uploaded
rdaveau
parents: 0
diff changeset
94
0
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
95 my $annovar_dir = 'inc/annovar';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
96 my $rdep = 'inc/R';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
97
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
98 my($varfile, $outdir, $outfile, $i, @DP4, @buffer, @Temp, @previous, @fnames, %opts, %chr);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
99
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
100 GetOptions(\%opts, "varfile=s", "outdir=s", "outfile=s");
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
101 $varfile = $opts{varfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
102 $outdir = $opts{outdir};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
103 $outfile = $opts{outfile};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
104
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
105 my $fname = readlink($varfile) || $varfile;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
106 $fname = basename($fname);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
107
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
108 my %fh=(
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
109 'chr1' => *chr1, 'chr2' => *chr2, 'chr3' => *chr3, 'chr4' => *chr4, 'chr5' => *chr5,
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
110 'chr6' => *chr6, 'chr7' => *chr7, 'chr8' => *chr8, 'chr9' => *chr9, 'chr10' => *chr10,
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
111 'chr11' => *chr11, 'chr12' => *chr12, 'chr13' => *chr13, 'chr14' => *chr14, 'chr15' => *chr15,
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
112 'chr16' => *chr16, 'chr17' => *chr17, 'chr18' => *chr18, 'chr19' => *chr19, 'chr20' => *chr20,
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
113 'chr21' => *chr21, 'chr22' => *chr22, 'chrX' => *chrX, 'chrY' => *chrY, 'chrM' => *chrM
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
114 );
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
115
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
116 `${annovar_dir}/convert2annovar.pl -format vcf4 $varfile -includeinfo > ${outdir}/${fname}_Temp-00 2> /dev/null` and die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
117
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
118 open($fh{$_}, ">${outdir}/${fname}_${_}.Temp-00") or die $! foreach keys %fh;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
119 open IN, "<${outdir}/${fname}_Temp-00" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
120 while(<IN>){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
121 /^(\S+)\s+(?:\S+\s+){2}(\S+)\s+(\S+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
122 next if !exists $fh{$1};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
123 if(min(length($2), length($3))!=1){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
124 chomp;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
125 @buffer=split /\s+/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
126 @Temp=explode_varcall(@buffer[1,3..4]);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
127 for($i=0; $i<$#{$Temp[0]}; $i+=2){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
128 print{ $fh{$buffer[0]} } join("\t", $buffer[0], @{$Temp[0]}[$i..$i+1], @{$Temp[1]}[$i..$i+1], @buffer[6..$#buffer]), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
129 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
130 next;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
131 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
132 print{ $fh{$1} } $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
133 $chr{$1}++;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
134 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
135 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
136 foreach (keys %fh){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
137 close($fh{$_});
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
138 next if !exists $chr{$_};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
139 `sort -k2,2n -k3,3n ${outdir}/${fname}_${_}.Temp-00 > ${outdir}/${fname}_${_}.Temp-01` and die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
140 open IN, "<${outdir}/${fname}_${_}.Temp-01" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
141 open OUT, ">${outdir}/${fname}_${_}.Temp-02" or die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
142 $_=readline(IN);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
143 /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
144 @buffer=split /\s+/, $1.$2;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
145 ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
146 @DP4=split /,/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
147 push @buffer, @DP4;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
148 @previous=@buffer;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
149 MAINLOOP: while(<IN>){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
150 /^((?:\S+\s+){7})(?:\S+\s+){8}(\S+\s+\S+)/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
151 @buffer=split /\s+/, $1.$2;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
152 ($_=pop(@buffer))=~s/.+DP4=([^;]+).+/$1/;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
153 @DP4=split /,/, $_;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
154 push @buffer, @DP4;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
155 while(($previous[0] eq $buffer[0]) && ($buffer[2]==$previous[2]+1) && (join('', @previous[3..4]) !~ /-/) && (join('', @buffer[3..4]) !~ /-/)){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
156 $previous[2]=$buffer[2];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
157 $previous[$_].=$buffer[$_] for 3..4;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
158 $previous[5]='unk' if $previous[5] ne $buffer[5];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
159 $previous[7]='SKIP' if $previous[7] ne $buffer[7];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
160 for (6,8..11){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
161 $previous[$_]+=$buffer[$_];
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
162 $previous[$_]/=2;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
163 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
164 next MAINLOOP;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
165 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
166 $previous[7]='NONE' if $previous[7] eq '.';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
167 $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
168 print OUT join("\t", @previous[0..6,8..11,7]), "\n";
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
169 @Temp=@previous if eof;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
170 @previous=@buffer;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
171 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
172 $previous[7]='NONE' if $previous[7] eq '.';
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
173 $previous[$_]=sprintf("%.0f", $previous[$_]) for (6,8..11);
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
174 print OUT join("\t", @previous[0..6,8..11,7]), "\n" if(join('_', @Temp[1..2]) ne join('_', @previous[1..2]));
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
175 close IN;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
176 close OUT;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
177 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
178 foreach (1..22, 'X', 'Y', 'M'){
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
179 push @fnames, "${outdir}/${fname}_chr${_}.Temp-02" if exists $chr{"chr$_"};
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
180 }
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
181 system join(' ', 'cat', @fnames, '>', "${outdir}/${fname}.Temp.2R") and die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
182 `${rbin} --vanilla --slave --args ${outdir}/${fname}.Temp.2R < ${rdep}/samvcf_data_parser.R` and die $!;
f753b30013e6 Uploaded
rdaveau
parents:
diff changeset
183 system "rm ${outdir}/${fname}*Temp* $outfile; ln -s ${outdir}/${fname}.var $outfile" and die $!;