15
|
1 use strict;
|
|
2
|
|
3
|
|
4 my $inputfile = $ARGV[0];
|
|
5 my $usemaxent = 1;
|
|
6
|
|
7 my $modelpath = "/Users/xuebing/galaxy-dist/tools/mytools/splicesitescore/";
|
|
8 my %me2x5 = &makescorematrix($modelpath.'me2x5');
|
|
9 my %seq = &makesequencematrix($modelpath.'splicemodels/splice5sequences');
|
|
10
|
|
11 my %bgd;
|
|
12 $bgd{'A'} = 0.27;
|
|
13 $bgd{'C'} = 0.23;
|
|
14 $bgd{'G'} = 0.23;
|
|
15 $bgd{'T'} = 0.27;
|
|
16
|
|
17
|
|
18
|
|
19 open (FILE,"<$inputfile") || die "can't open!\n";
|
|
20
|
|
21 while(<FILE>) {
|
|
22 chomp;
|
|
23 if (/^\s*$/) { #discard blank lines;
|
|
24 next;
|
|
25 }
|
|
26 elsif (/^>/) { #discard comment lines;
|
|
27 print $_."\t";
|
|
28 next;
|
|
29 }
|
|
30 elsif (/[NQWERYUIOPLKJHFDSZXVBM]/) {
|
|
31 next;
|
|
32 }
|
|
33 else {
|
|
34 $_ =~ s/\cM//g; #gets rid of carriage return
|
|
35 my $str = $_;
|
|
36 print $str."\t";
|
|
37 $str = uc($str);
|
|
38 if ($usemaxent) {
|
|
39 print sprintf("%.2f",&log2(&scoreconsensus($str)*$me2x5{$seq{&getrest($str)}}))."\n";
|
|
40 }
|
|
41 }
|
|
42 }
|
|
43
|
|
44
|
|
45 sub makesequencematrix{
|
|
46 my $file = shift;
|
|
47 my %matrix;my $n=0;
|
|
48 open(SCOREF, $file) || die "Can't open $file!\n";
|
|
49 while(<SCOREF>) {
|
|
50 chomp;
|
|
51 $_=~ s/\s//;
|
|
52 $matrix{$_} = $n;
|
|
53 $n++;
|
|
54 }
|
|
55 close(SCOREF);
|
|
56 return %matrix;
|
|
57 }
|
|
58 sub makescorematrix{
|
|
59 my $file = shift;
|
|
60 my %matrix;my $n=0;
|
|
61 open(SCOREF, $file) || die "Can't open $file!\n";
|
|
62 while(<SCOREF>) {
|
|
63 chomp;
|
|
64 $_=~ s/\s//;
|
|
65 $matrix{$n} = $_;
|
|
66 $n++;
|
|
67 }
|
|
68 close(SCOREF);
|
|
69 return %matrix;
|
|
70 }
|
|
71
|
|
72 sub getrest{
|
|
73 my $seq = shift;
|
|
74 my @seqa = split(//,uc($seq));
|
|
75 return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8];
|
|
76 }
|
|
77 sub scoreconsensus{
|
|
78 my $seq = shift;
|
|
79 my @seqa = split(//,uc($seq));
|
|
80 my %bgd;
|
|
81 $bgd{'A'} = 0.27;
|
|
82 $bgd{'C'} = 0.23;
|
|
83 $bgd{'G'} = 0.23;
|
|
84 $bgd{'T'} = 0.27;
|
|
85 my %cons1;
|
|
86 $cons1{'A'} = 0.004;
|
|
87 $cons1{'C'} = 0.0032;
|
|
88 $cons1{'G'} = 0.9896;
|
|
89 $cons1{'T'} = 0.0032;
|
|
90 my %cons2;
|
|
91 $cons2{'A'} = 0.0034;
|
|
92 $cons2{'C'} = 0.0039;
|
|
93 $cons2{'G'} = 0.0042;
|
|
94 $cons2{'T'} = 0.9884;
|
|
95 my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]});
|
|
96 return $addscore;
|
|
97 }
|
|
98
|
|
99 sub log2{
|
|
100 my ($val) = @_;
|
|
101 return log($val)/log(2);
|
|
102 }
|