0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4 use warnings FATAL => qw[ numeric uninitialized ];
|
|
5 use List::Util qw[ sum min max ];
|
|
6 use List::MoreUtils qw[ first_index ];
|
|
7 use File::Basename;
|
|
8 use Getopt::Long;
|
|
9
|
|
10 my($varfile, $buildver, $dbdir, $release, $outdir, $outfile, $max, $i, $k);
|
|
11 my(@buffer, @legend, @header, @k, @score, @tools, @Temp, %buffer, %AAS, %dbscore, %dbtools, %opts);
|
|
12
|
|
13 GetOptions(\%opts, "varfile=s", "buildver=s", "dbdir=s", "release=s", "outdir=s", "outfile=s");
|
|
14 $varfile = $opts{varfile};
|
|
15 $buildver = $opts{buildver};
|
|
16 $dbdir = $opts{dbdir};
|
|
17 $release = $opts{release};
|
|
18 $outdir = $opts{outdir};
|
|
19 $outfile = $opts{outfile};
|
|
20
|
|
21 my $fname = readlink($varfile) || $varfile;
|
|
22 my $dbfile="${dbdir}/${buildver}_dbnsfp_${release}.txt";
|
|
23 $fname = basename($fname);
|
|
24
|
|
25 open IN, "<$varfile" or die $!;
|
|
26 open OUT, ">${outdir}/${fname}.Temp" or die $!;
|
|
27 while(<IN>){
|
|
28 push @legend, $1 and next if /^#(.+=.+)/;
|
|
29 next if $_!~/\b(?:s(?:g|l))|ns\b/;
|
|
30 next if /\s+-\s+/;
|
|
31 /^(?:chr)*(\S+)\s+(\S+)/;
|
|
32 @{$buffer{($k=join('_', $1, $2))}->{dbnsfp}}=();
|
|
33 push @k, $k;
|
|
34 print OUT $_;
|
|
35 }
|
|
36 close IN;
|
|
37 close OUT;
|
|
38
|
|
39 $i=first_index{ /^annot/ } @legend;
|
|
40 @_=$legend[$i]=~/((?:s(?:g|l)|ns):[^;|\s]+)/g;
|
|
41 $legend[$i]=join(' = ', 'annot', join('; ', @_));
|
|
42 push @legend, (
|
|
43 'AAS = Amino Acid Substitution(s)',
|
|
44 "FIS = Functional Impact Score(s) from dbnsfp ${release} release",
|
|
45 "OCC = number of tools from which FIS was/were calculated",
|
|
46 "FIS.max = highest score among FIS",
|
|
47 "OCC.max = number of tools from which FIS.max was calculated",
|
|
48 "PRED = qualitative ternary classifier ie. [L]ow; [M]edium; [H]igh"
|
|
49 );
|
|
50 foreach (@legend){
|
|
51 /^(\S+)/;
|
|
52 push @header, $1;
|
|
53 }
|
|
54
|
|
55 open IN, "<$dbfile" or die $!;
|
|
56 while(<IN>){
|
|
57 next if /^#/;
|
|
58 /^(\S+)\s+(\S+)(?:\s+\S+){2}\s+(.+)/;
|
|
59 next if !exists $buffer{($k=join('_', $1, $2))};
|
|
60 push @{$buffer{$k}->{dbnsfp}}, join(':', split /\t/, $3);
|
|
61 }
|
|
62 close IN;
|
|
63 open IN, "<${outdir}/${fname}.Temp" or die $!;
|
|
64 open OUT, ">${outdir}/${fname}.dbnsfp" or die $!;
|
|
65 print OUT "#", $_, "\n" foreach @legend;
|
|
66 print OUT "#", join("\t", @header), "\n";
|
|
67 foreach $k (@k){
|
|
68 $i=0;
|
|
69 $_=readline(IN);
|
|
70 chomp;
|
|
71 @buffer=split /\s+/, $_;
|
|
72 %{$_}=() foreach (\%AAS, \%dbscore, \%dbtools);
|
|
73 foreach (split(/[;\|]/, $buffer[-1])){
|
|
74 $AAS{$1.$2}++ if /^p\.(\w{1})\d+(\w{1})$/;
|
|
75 }
|
|
76 if($#{$buffer{$k}->{dbnsfp}}<0){
|
|
77 unshift @buffer, (%AAS)?(join(':', keys %AAS)):('na'), (join(':', ('na') x max(scalar(keys %AAS), 1))) x 2;
|
|
78 }elsif(%AAS){
|
|
79 foreach (@{$buffer{$k}->{dbnsfp}}){
|
|
80 @Temp=split /:/, $_;
|
|
81 $k=shift @Temp;
|
|
82 @{$_}=split(/;/, pop @Temp) foreach (\@tools, \@score);
|
|
83 foreach (split /;/, shift @Temp){
|
|
84 $dbscore{$k.$_}=shift @score;
|
|
85 $dbtools{$k.$_}=shift @tools;
|
|
86 }
|
|
87 }
|
|
88 foreach (keys %AAS){
|
|
89 push @score, $dbscore{$_} || 'na';
|
|
90 push @tools, $dbtools{$_} || 'na';
|
|
91 }
|
|
92 unshift @buffer, join(':', keys %AAS), join(':', @score), join(':', @tools);
|
|
93 }
|
|
94 push @buffer, shift @buffer for 1..3;
|
|
95 @{$_}=grep{ !/na/ } split(/:/, $buffer[--$i]) foreach (\@tools, \@score);
|
|
96 $max=max(@score) || 'na';
|
|
97 push @buffer, (($max ne 'na')?($score[($i=first_index{ $max } @score)], $tools[$i], ($max<.3)?'L':($max<.7)?'M':'H'):(('na')x3));
|
|
98 print OUT join("\t", @buffer), "\n";
|
|
99 }
|
|
100 close IN;
|
|
101 close OUT;
|
|
102 system "rm ${outdir}/${fname}*Temp $outfile; ln -s ${outdir}/${fname}.dbnsfp $outfile" and die $!; |