annotate tools/mytools/splicesitescore/score5.pl @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 use strict;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 my $inputfile = $ARGV[0];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 my $usemaxent = 1;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 my $modelpath = "/Users/xuebing/galaxy-dist/tools/mytools/splicesitescore/";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 my %me2x5 = &makescorematrix($modelpath.'me2x5');
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 my %seq = &makesequencematrix($modelpath.'splicemodels/splice5sequences');
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 my %bgd;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 $bgd{'A'} = 0.27;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 $bgd{'C'} = 0.23;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 $bgd{'G'} = 0.23;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 $bgd{'T'} = 0.27;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 open (FILE,"<$inputfile") || die "can't open!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 while(<FILE>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 if (/^\s*$/) { #discard blank lines;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 next;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 elsif (/^>/) { #discard comment lines;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 print $_."\t";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 next;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 elsif (/[NQWERYUIOPLKJHFDSZXVBM]/) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 next;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 $_ =~ s/\cM//g; #gets rid of carriage return
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 my $str = $_;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 print $str."\t";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 $str = uc($str);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 if ($usemaxent) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 print sprintf("%.2f",&log2(&scoreconsensus($str)*$me2x5{$seq{&getrest($str)}}))."\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 sub makesequencematrix{
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 my $file = shift;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 my %matrix;my $n=0;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 open(SCOREF, $file) || die "Can't open $file!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 while(<SCOREF>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 $_=~ s/\s//;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 $matrix{$_} = $n;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 $n++;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 close(SCOREF);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 return %matrix;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 sub makescorematrix{
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 my $file = shift;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 my %matrix;my $n=0;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 open(SCOREF, $file) || die "Can't open $file!\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 while(<SCOREF>) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 chomp;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 $_=~ s/\s//;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 $matrix{$n} = $_;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 $n++;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 close(SCOREF);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 return %matrix;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 sub getrest{
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 my $seq = shift;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 my @seqa = split(//,uc($seq));
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 sub scoreconsensus{
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 my $seq = shift;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 my @seqa = split(//,uc($seq));
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 my %bgd;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 $bgd{'A'} = 0.27;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 $bgd{'C'} = 0.23;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 $bgd{'G'} = 0.23;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 $bgd{'T'} = 0.27;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 my %cons1;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 $cons1{'A'} = 0.004;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 $cons1{'C'} = 0.0032;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 $cons1{'G'} = 0.9896;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 $cons1{'T'} = 0.0032;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 my %cons2;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 $cons2{'A'} = 0.0034;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 $cons2{'C'} = 0.0039;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 $cons2{'G'} = 0.0042;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 $cons2{'T'} = 0.9884;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]});
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 return $addscore;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 sub log2{
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 my ($val) = @_;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 return log($val)/log(2);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 }