diff NEUMA-1.2.1/calculate_iEUMA.2.pl @ 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/calculate_iEUMA.2.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+# Compute gEUMA for each gene from the iUtable file and insertlendis file and print out only genes whose gEUMA is larger than 0.
+
+if(@ARGV<3) { print "usage: $0 iUtable_file insertlendis_file READLENGTH\n"; exit; }
+
+my ($iUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
+
+
+my $totalfreq=0;
+open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
+while(<LENDIS>){
+ chomp;
+ my ($insert_size, $freq) = split/\t/;
+ $insertlendis{"$insert_size"}=$freq;
+ $totalfreq+=$freq;
+}
+close LENDIS;
+
+
+
+print "gene\tgEUMA\n";  #header
+open IUTABLE, $iUtable_file or die "Can't open iUtable file $iUtable_file\n";
+chomp($header = <IUTABLE>); 
+@distances = split/\t/,$header;
+shift @distances;   #'gene'
+shift @distances;   #'transcript
+while(<IUTABLE>){
+ chomp;
+ my @line = split/\t/;
+ my $gid = shift @line;
+ my $transcript = shift @line;
+ my $gene = "$gid\t$transcript";
+
+ my $EUMA=0;
+ for my $i (0..$#distances){
+    $d=$distances[$i];
+    $pUMA = $line[$i] * ($insertlendis{$d}/$totalfreq) ;
+    $EUMA += $pUMA;
+ }
+ if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
+}
+close IUTABLE;