comparison tools/mytools/splicesitescore/score5.pl @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 use strict;
2
3
4 my $inputfile = $ARGV[0];
5 my $usemaxent = 1;
6
7 my $modelpath = "/Users/xuebing/galaxy-dist/tools/mytools/splicesitescore/";
8 my %me2x5 = &makescorematrix($modelpath.'me2x5');
9 my %seq = &makesequencematrix($modelpath.'splicemodels/splice5sequences');
10
11 my %bgd;
12 $bgd{'A'} = 0.27;
13 $bgd{'C'} = 0.23;
14 $bgd{'G'} = 0.23;
15 $bgd{'T'} = 0.27;
16
17
18
19 open (FILE,"<$inputfile") || die "can't open!\n";
20
21 while(<FILE>) {
22 chomp;
23 if (/^\s*$/) { #discard blank lines;
24 next;
25 }
26 elsif (/^>/) { #discard comment lines;
27 print $_."\t";
28 next;
29 }
30 elsif (/[NQWERYUIOPLKJHFDSZXVBM]/) {
31 next;
32 }
33 else {
34 $_ =~ s/\cM//g; #gets rid of carriage return
35 my $str = $_;
36 print $str."\t";
37 $str = uc($str);
38 if ($usemaxent) {
39 print sprintf("%.2f",&log2(&scoreconsensus($str)*$me2x5{$seq{&getrest($str)}}))."\n";
40 }
41 }
42 }
43
44
45 sub makesequencematrix{
46 my $file = shift;
47 my %matrix;my $n=0;
48 open(SCOREF, $file) || die "Can't open $file!\n";
49 while(<SCOREF>) {
50 chomp;
51 $_=~ s/\s//;
52 $matrix{$_} = $n;
53 $n++;
54 }
55 close(SCOREF);
56 return %matrix;
57 }
58 sub makescorematrix{
59 my $file = shift;
60 my %matrix;my $n=0;
61 open(SCOREF, $file) || die "Can't open $file!\n";
62 while(<SCOREF>) {
63 chomp;
64 $_=~ s/\s//;
65 $matrix{$n} = $_;
66 $n++;
67 }
68 close(SCOREF);
69 return %matrix;
70 }
71
72 sub getrest{
73 my $seq = shift;
74 my @seqa = split(//,uc($seq));
75 return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8];
76 }
77 sub scoreconsensus{
78 my $seq = shift;
79 my @seqa = split(//,uc($seq));
80 my %bgd;
81 $bgd{'A'} = 0.27;
82 $bgd{'C'} = 0.23;
83 $bgd{'G'} = 0.23;
84 $bgd{'T'} = 0.27;
85 my %cons1;
86 $cons1{'A'} = 0.004;
87 $cons1{'C'} = 0.0032;
88 $cons1{'G'} = 0.9896;
89 $cons1{'T'} = 0.0032;
90 my %cons2;
91 $cons2{'A'} = 0.0034;
92 $cons2{'C'} = 0.0039;
93 $cons2{'G'} = 0.0042;
94 $cons2{'T'} = 0.9884;
95 my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]});
96 return $addscore;
97 }
98
99 sub log2{
100 my ($val) = @_;
101 return log($val)/log(2);
102 }