Mercurial > repos > xuebing > splicesite_max_entropy
comparison maxent_score5.pl @ 12:6e7e036c13ed
Uploaded
author | xuebing |
---|---|
date | Sun, 01 Apr 2012 08:24:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
11:7ffb2ec6f557 | 12:6e7e036c13ed |
---|---|
1 use strict; | |
2 | |
3 | |
4 my $inputfile = $ARGV[0]; | |
5 my $usemaxent = 1; | |
6 | |
7 my $modelpath = "/home/sharp-galaxy.orig/tools/maxentscan"; | |
8 my %me2x5 = &makescorematrix($modelpath.'me2x5'); | |
9 my %seq = &makesequencematrix($modelpath.'splicemodels/splice5sequences'); | |
10 | |
11 my %bgd; | |
12 $bgd{'A'} = 0.27; | |
13 $bgd{'C'} = 0.23; | |
14 $bgd{'G'} = 0.23; | |
15 $bgd{'T'} = 0.27; | |
16 | |
17 | |
18 | |
19 open (FILE,"<$inputfile") || die "can't open!\n"; | |
20 | |
21 while(<FILE>) { | |
22 chomp; | |
23 if (/^\s*$/) { #discard blank lines; | |
24 next; | |
25 } | |
26 elsif (/^>/) { #discard comment lines; | |
27 print $_."\t"; | |
28 next; | |
29 } | |
30 elsif (/[NQWERYUIOPLKJHFDSZXVBM]/) { | |
31 next; | |
32 } | |
33 else { | |
34 $_ =~ s/\cM//g; #gets rid of carriage return | |
35 my $str = $_; | |
36 print $str."\t"; | |
37 $str = uc($str); | |
38 if ($usemaxent) { | |
39 print sprintf("%.2f",&log2(&scoreconsensus($str)*$me2x5{$seq{&getrest($str)}}))."\n"; | |
40 } | |
41 } | |
42 } | |
43 | |
44 | |
45 sub makesequencematrix{ | |
46 my $file = shift; | |
47 my %matrix;my $n=0; | |
48 open(SCOREF, $file) || die "Can't open $file!\n"; | |
49 while(<SCOREF>) { | |
50 chomp; | |
51 $_=~ s/\s//; | |
52 $matrix{$_} = $n; | |
53 $n++; | |
54 } | |
55 close(SCOREF); | |
56 return %matrix; | |
57 } | |
58 sub makescorematrix{ | |
59 my $file = shift; | |
60 my %matrix;my $n=0; | |
61 open(SCOREF, $file) || die "Can't open $file!\n"; | |
62 while(<SCOREF>) { | |
63 chomp; | |
64 $_=~ s/\s//; | |
65 $matrix{$n} = $_; | |
66 $n++; | |
67 } | |
68 close(SCOREF); | |
69 return %matrix; | |
70 } | |
71 | |
72 sub getrest{ | |
73 my $seq = shift; | |
74 my @seqa = split(//,uc($seq)); | |
75 return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8]; | |
76 } | |
77 sub scoreconsensus{ | |
78 my $seq = shift; | |
79 my @seqa = split(//,uc($seq)); | |
80 my %bgd; | |
81 $bgd{'A'} = 0.27; | |
82 $bgd{'C'} = 0.23; | |
83 $bgd{'G'} = 0.23; | |
84 $bgd{'T'} = 0.27; | |
85 my %cons1; | |
86 $cons1{'A'} = 0.004; | |
87 $cons1{'C'} = 0.0032; | |
88 $cons1{'G'} = 0.9896; | |
89 $cons1{'T'} = 0.0032; | |
90 my %cons2; | |
91 $cons2{'A'} = 0.0034; | |
92 $cons2{'C'} = 0.0039; | |
93 $cons2{'G'} = 0.0042; | |
94 $cons2{'T'} = 0.9884; | |
95 my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]}); | |
96 return $addscore; | |
97 } | |
98 | |
99 sub log2{ | |
100 my ($val) = @_; | |
101 return log($val)/log(2); | |
102 } |