view NEUMA-1.2.1/calculate_iEUMA.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl
# Compute iEUMA for each isoform from the iUtable file and insertlendis file and print out only isoforms whose iEUMA is larger than 0.

if(@ARGV<2) { print "usage: $0 iUtable_file insertlendis_file READLENGTH\n"; exit; }

my ($iUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;

open IUTABLE, $iUtable_file or die "Can't open iUtable file $iUtable_file\n";
chomp($header = <IUTABLE>); 
@distances = split/\t/,$header;
shift @distances;   #'gene'
while(<IUTABLE>){
 chomp;
 my @line = split/\t/;
 my $gid = shift @line;
 my $transcript = shift @line;
 my $gene = "$gid\t$transcript";
 for my $i (0..$#distances){
   $iUtable{$gene}{"$distances[$i]"}=$line[$i];
 }
}
close IUTABLE;


my $totalfreq=0;
open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
while(<LENDIS>){
 chomp;
 my ($insert_size, $freq) = split/\t/;
 $insertlendis{$insert_size}=$freq;
 $totalfreq+=$freq;
}
close LENDIS;

print "gene\ttranscript\tiEUMA\n";  #header
for my $gene (sort keys %iUtable){
  my $EUMA=0;
  for my $d (keys %{$iUtable{$gene}}){
    $pUMA = $iUtable{$gene}{$d} * ($insertlendis{$d}/$totalfreq) ;
    $EUMA += $pUMA;
  }
  if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
}