Mercurial > repos > xuebing > splicesite_max_entropy
changeset 12:6e7e036c13ed
Uploaded
author | xuebing |
---|---|
date | Sun, 01 Apr 2012 08:24:09 -0400 |
parents | 7ffb2ec6f557 |
children | c1ff20c4800c |
files | maxent_score5.pl |
diffstat | 1 files changed, 102 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/maxent_score5.pl Sun Apr 01 08:24:09 2012 -0400 @@ -0,0 +1,102 @@ +use strict; + + +my $inputfile = $ARGV[0]; +my $usemaxent = 1; + +my $modelpath = "/home/sharp-galaxy.orig/tools/maxentscan"; +my %me2x5 = &makescorematrix($modelpath.'me2x5'); +my %seq = &makesequencematrix($modelpath.'splicemodels/splice5sequences'); + +my %bgd; +$bgd{'A'} = 0.27; +$bgd{'C'} = 0.23; +$bgd{'G'} = 0.23; +$bgd{'T'} = 0.27; + + + +open (FILE,"<$inputfile") || die "can't open!\n"; + +while(<FILE>) { + chomp; + if (/^\s*$/) { #discard blank lines; + next; + } + elsif (/^>/) { #discard comment lines; + print $_."\t"; + next; + } + elsif (/[NQWERYUIOPLKJHFDSZXVBM]/) { + next; + } + else { + $_ =~ s/\cM//g; #gets rid of carriage return + my $str = $_; + print $str."\t"; + $str = uc($str); + if ($usemaxent) { + print sprintf("%.2f",&log2(&scoreconsensus($str)*$me2x5{$seq{&getrest($str)}}))."\n"; + } + } +} + + +sub makesequencematrix{ + my $file = shift; + my %matrix;my $n=0; + open(SCOREF, $file) || die "Can't open $file!\n"; + while(<SCOREF>) { + chomp; + $_=~ s/\s//; + $matrix{$_} = $n; + $n++; + } + close(SCOREF); + return %matrix; +} +sub makescorematrix{ + my $file = shift; + my %matrix;my $n=0; + open(SCOREF, $file) || die "Can't open $file!\n"; + while(<SCOREF>) { + chomp; + $_=~ s/\s//; + $matrix{$n} = $_; + $n++; + } + close(SCOREF); + return %matrix; +} + +sub getrest{ + my $seq = shift; + my @seqa = split(//,uc($seq)); + return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8]; +} +sub scoreconsensus{ + my $seq = shift; + my @seqa = split(//,uc($seq)); + my %bgd; + $bgd{'A'} = 0.27; + $bgd{'C'} = 0.23; + $bgd{'G'} = 0.23; + $bgd{'T'} = 0.27; + my %cons1; + $cons1{'A'} = 0.004; + $cons1{'C'} = 0.0032; + $cons1{'G'} = 0.9896; + $cons1{'T'} = 0.0032; + my %cons2; + $cons2{'A'} = 0.0034; + $cons2{'C'} = 0.0039; + $cons2{'G'} = 0.0042; + $cons2{'T'} = 0.9884; + my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]}); + return $addscore; +} + +sub log2{ + my ($val) = @_; + return log($val)/log(2); +}