Mercurial > repos > rdaveau > gfap
comparison gfapts/gfap_r1.0_cdsvar_functional_annotater.pl @ 0:f753b30013e6 draft
Uploaded
author | rdaveau |
---|---|
date | Fri, 29 Jun 2012 10:20:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f753b30013e6 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use warnings FATAL => qw[ numeric uninitialized ]; | |
5 use List::Util qw[ sum min max ]; | |
6 use List::MoreUtils qw[ first_index ]; | |
7 use File::Basename; | |
8 use Getopt::Long; | |
9 | |
10 my($varfile, $buildver, $dbdir, $release, $outdir, $outfile, $max, $i, $k); | |
11 my(@buffer, @legend, @header, @k, @score, @tools, @Temp, %buffer, %AAS, %dbscore, %dbtools, %opts); | |
12 | |
13 GetOptions(\%opts, "varfile=s", "buildver=s", "dbdir=s", "release=s", "outdir=s", "outfile=s"); | |
14 $varfile = $opts{varfile}; | |
15 $buildver = $opts{buildver}; | |
16 $dbdir = $opts{dbdir}; | |
17 $release = $opts{release}; | |
18 $outdir = $opts{outdir}; | |
19 $outfile = $opts{outfile}; | |
20 | |
21 my $fname = readlink($varfile) || $varfile; | |
22 my $dbfile="${dbdir}/${buildver}_dbnsfp_${release}.txt"; | |
23 $fname = basename($fname); | |
24 | |
25 open IN, "<$varfile" or die $!; | |
26 open OUT, ">${outdir}/${fname}.Temp" or die $!; | |
27 while(<IN>){ | |
28 push @legend, $1 and next if /^#(.+=.+)/; | |
29 next if $_!~/\b(?:s(?:g|l))|ns\b/; | |
30 next if /\s+-\s+/; | |
31 /^(?:chr)*(\S+)\s+(\S+)/; | |
32 @{$buffer{($k=join('_', $1, $2))}->{dbnsfp}}=(); | |
33 push @k, $k; | |
34 print OUT $_; | |
35 } | |
36 close IN; | |
37 close OUT; | |
38 | |
39 $i=first_index{ /^annot/ } @legend; | |
40 @_=$legend[$i]=~/((?:s(?:g|l)|ns):[^;|\s]+)/g; | |
41 $legend[$i]=join(' = ', 'annot', join('; ', @_)); | |
42 push @legend, ( | |
43 'AAS = Amino Acid Substitution(s)', | |
44 "FIS = Functional Impact Score(s) from dbnsfp ${release} release", | |
45 "OCC = number of tools from which FIS was/were calculated", | |
46 "FIS.max = highest score among FIS", | |
47 "OCC.max = number of tools from which FIS.max was calculated", | |
48 "PRED = qualitative ternary classifier ie. [L]ow; [M]edium; [H]igh" | |
49 ); | |
50 foreach (@legend){ | |
51 /^(\S+)/; | |
52 push @header, $1; | |
53 } | |
54 | |
55 open IN, "<$dbfile" or die $!; | |
56 while(<IN>){ | |
57 next if /^#/; | |
58 /^(\S+)\s+(\S+)(?:\s+\S+){2}\s+(.+)/; | |
59 next if !exists $buffer{($k=join('_', $1, $2))}; | |
60 push @{$buffer{$k}->{dbnsfp}}, join(':', split /\t/, $3); | |
61 } | |
62 close IN; | |
63 open IN, "<${outdir}/${fname}.Temp" or die $!; | |
64 open OUT, ">${outdir}/${fname}.dbnsfp" or die $!; | |
65 print OUT "#", $_, "\n" foreach @legend; | |
66 print OUT "#", join("\t", @header), "\n"; | |
67 foreach $k (@k){ | |
68 $i=0; | |
69 $_=readline(IN); | |
70 chomp; | |
71 @buffer=split /\s+/, $_; | |
72 %{$_}=() foreach (\%AAS, \%dbscore, \%dbtools); | |
73 foreach (split(/[;\|]/, $buffer[-1])){ | |
74 $AAS{$1.$2}++ if /^p\.(\w{1})\d+(\w{1})$/; | |
75 } | |
76 if($#{$buffer{$k}->{dbnsfp}}<0){ | |
77 unshift @buffer, (%AAS)?(join(':', keys %AAS)):('na'), (join(':', ('na') x max(scalar(keys %AAS), 1))) x 2; | |
78 }elsif(%AAS){ | |
79 foreach (@{$buffer{$k}->{dbnsfp}}){ | |
80 @Temp=split /:/, $_; | |
81 $k=shift @Temp; | |
82 @{$_}=split(/;/, pop @Temp) foreach (\@tools, \@score); | |
83 foreach (split /;/, shift @Temp){ | |
84 $dbscore{$k.$_}=shift @score; | |
85 $dbtools{$k.$_}=shift @tools; | |
86 } | |
87 } | |
88 foreach (keys %AAS){ | |
89 push @score, $dbscore{$_} || 'na'; | |
90 push @tools, $dbtools{$_} || 'na'; | |
91 } | |
92 unshift @buffer, join(':', keys %AAS), join(':', @score), join(':', @tools); | |
93 } | |
94 push @buffer, shift @buffer for 1..3; | |
95 @{$_}=grep{ !/na/ } split(/:/, $buffer[--$i]) foreach (\@tools, \@score); | |
96 $max=max(@score) || 'na'; | |
97 push @buffer, (($max ne 'na')?($score[($i=first_index{ $max } @score)], $tools[$i], ($max<.3)?'L':($max<.7)?'M':'H'):(('na')x3)); | |
98 print OUT join("\t", @buffer), "\n"; | |
99 } | |
100 close IN; | |
101 close OUT; | |
102 system "rm ${outdir}/${fname}*Temp $outfile; ln -s ${outdir}/${fname}.dbnsfp $outfile" and die $!; |